1
0
Fork 0
mirror of https://github.com/iiab/iiab.git synced 2025-03-09 15:40:17 +00:00
iiab/roles/xsce-admin/files/cmdsrv/scripts/get_kiwix_lib
2017-05-27 11:09:50 -07:00

138 lines
4.4 KiB
Python

#!/usr/bin/python
# Not currently used
import xml.etree.ElementTree as ET
import json
import csv
import operator
import base64
import os.path
import sys
import urllib2
XSCE_PATH='/etc/xsce'
if not XSCE_PATH in sys.path:
sys.path.append(XSCE_PATH)
from xsce_env import get_xsce_env
zimLangCodes = []
zimGroups = {}
zimUrls = {}
zims = {}
zimUrls = {}
zimGroups = {}
zimLangCodes = []
zimCount = 0
langGroups = {"en":"eng"}
# as of Mar 21, 2015
problemUrlMap = {"http://download.kiwix.org/zim/0.9/wikipedia_en_ray_charles_03_2013.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_en_ray_charles_03_2013.zip", \
"http://download.kiwix.org/zim/0.9/wikipedia_en_wp1_0.8_45000+_12_2010.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_en_wp1_0.8_45000+_12_2010.zip", \
"http://download.kiwix.org/zim/0.9/wikipedia_es_venezuela_11_2012.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_es_venezuela_11_2012.zip", \
"http://download.kiwix.org/zim/wikispecies/wikispecies_en_all_nopic_2015-03.zim.meta4":"http://download.kiwix.org/portable/wikispecies/kiwix-+wikispecies_en_all_nopic_2015-03.zip", \
"http://download.kiwix.org/zim/wikispecies/wikispecies_en_all_2015-03.zim.meta4":"http://download.kiwix.org/portable/wikispecies/kiwix-+wikispecies_en_all_2015-03.zip"}
WWWROOT = get_xsce_env('WWWROOT')
consolePath = WWWROOT + "/devel/"
assetsPath = consolePath + "assets/"
imgPath = consolePath + "images/"
jsonPath = "/etc/xsce/"
# input file
#xmlfile = assetsPath + "library.xml"
kiwixLibUrl = 'http://www.kiwix.org/library.xml'
zimKiwixUrl = "http://download.kiwix.org/portable/"
zimKiwixPrefix = "kiwix-0.9+"
# output files
zim_langs = jsonPath + "zim_langs.json"
zim_groups = jsonPath + "zim_groups.json"
zim_catalog = jsonPath + "zim_catalog.json"
zim_urls = jsonPath + "zim_urls.json"
try:
xmlsrc = urllib2.urlopen(kiwixLibUrl)
tree = ET.parse(xmlsrc)
xmlsrc.close()
except (urllib2.URLError) as exc:
sys.stdout.write("GET-ZIM-LIB ERROR - " + str(exc.reason))
sys.stdout.flush()
sys.exit(0)
#tree = ET.parse("assets/test.xml")
root = tree.getroot()
for child in root:
attributes = {}
listAttr = {}
attributes = child.attrib
if 'id' in attributes and 'language' in attributes and 'creator' in attributes:
id = attributes['id']
lang = attributes['language']
category = attributes['creator']
url = attributes['url']
if lang in langGroups: # allow for grouping of language codes
lang = langGroups[lang]
if lang not in zimLangCodes:
zimLangCodes.append(lang)
if lang not in zimGroups:
zimGroups[lang] = {}
if category not in zimGroups[lang]:
zimGroups[lang][category] = []
zimGroups[lang][category].append(id)
urlSlash = url.split('/')
urlEnd = urlSlash[-1] # last element
if url in problemUrlMap:
downloadUrl = problemUrlMap[url]
else:
downloadUrl = url.replace("/zim/", "/portable/")
downloadUrl = downloadUrl.replace(urlEnd, zimKiwixPrefix + urlEnd)
downloadUrl = downloadUrl.replace(".zim.meta4", ".zip")
fileRef = urlEnd.split('.')
fileRef = fileRef[0]
permaRefParts = urlEnd.split('_')
permaRefParts = permaRefParts[0:len(permaRefParts) - 1]
permaRef = permaRefParts[0]
for part in permaRefParts[1:]:
if not part.isdigit():
permaRef += "_" + part
attributes['perma_ref'] = permaRef
#if 'favicon' in attributes:
# filename = imgPath + id + '.png' # we should possibly use permaRef
# if os.path.exists(filename):
# imgdata = base64.b64decode(attributes['favicon'])
# with open(filename, 'wb') as f:
# f.write(imgdata)
# attributes['favicon'] = filename
zims[id] = attributes
zimCount += 1
# print zimCount, attributes['language'], attributes['title'], attributes['description']
listAttr['download_url'] = downloadUrl
listAttr['file_ref'] = fileRef
listAttr['perma_ref'] = permaRef
listAttr['size'] = attributes['size']
zimUrls[id] = listAttr
zimLangCodes.sort()
with open(zim_groups, 'w') as fp:
json.dump(zimGroups, fp)
with open(zim_langs, 'w') as fp:
json.dump(zimLangCodes, fp)
with open(zim_catalog, 'w') as fp:
json.dump(zims, fp)
with open(zim_urls, 'w') as fp:
json.dump(zimUrls, fp)
sys.stdout.write("SUCCESS")
sys.stdout.flush()
sys.exit(0)