mirror of
https://github.com/iiab/iiab.git
synced 2025-03-09 15:40:17 +00:00
138 lines
4.4 KiB
Python
138 lines
4.4 KiB
Python
#!/usr/bin/python
|
|
|
|
# Not currently used
|
|
|
|
import xml.etree.ElementTree as ET
|
|
import json
|
|
import csv
|
|
import operator
|
|
import base64
|
|
import os.path
|
|
import sys
|
|
import urllib2
|
|
XSCE_PATH='/etc/xsce'
|
|
if not XSCE_PATH in sys.path:
|
|
sys.path.append(XSCE_PATH)
|
|
from xsce_env import get_xsce_env
|
|
|
|
zimLangCodes = []
|
|
zimGroups = {}
|
|
zimUrls = {}
|
|
zims = {}
|
|
|
|
zimUrls = {}
|
|
zimGroups = {}
|
|
zimLangCodes = []
|
|
zimCount = 0
|
|
|
|
langGroups = {"en":"eng"}
|
|
|
|
# as of Mar 21, 2015
|
|
problemUrlMap = {"http://download.kiwix.org/zim/0.9/wikipedia_en_ray_charles_03_2013.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_en_ray_charles_03_2013.zip", \
|
|
"http://download.kiwix.org/zim/0.9/wikipedia_en_wp1_0.8_45000+_12_2010.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_en_wp1_0.8_45000+_12_2010.zip", \
|
|
"http://download.kiwix.org/zim/0.9/wikipedia_es_venezuela_11_2012.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_es_venezuela_11_2012.zip", \
|
|
"http://download.kiwix.org/zim/wikispecies/wikispecies_en_all_nopic_2015-03.zim.meta4":"http://download.kiwix.org/portable/wikispecies/kiwix-+wikispecies_en_all_nopic_2015-03.zip", \
|
|
"http://download.kiwix.org/zim/wikispecies/wikispecies_en_all_2015-03.zim.meta4":"http://download.kiwix.org/portable/wikispecies/kiwix-+wikispecies_en_all_2015-03.zip"}
|
|
|
|
WWWROOT = get_xsce_env('WWWROOT')
|
|
consolePath = WWWROOT + "/devel/"
|
|
assetsPath = consolePath + "assets/"
|
|
imgPath = consolePath + "images/"
|
|
jsonPath = "/etc/xsce/"
|
|
|
|
# input file
|
|
#xmlfile = assetsPath + "library.xml"
|
|
kiwixLibUrl = 'http://www.kiwix.org/library.xml'
|
|
zimKiwixUrl = "http://download.kiwix.org/portable/"
|
|
zimKiwixPrefix = "kiwix-0.9+"
|
|
|
|
# output files
|
|
zim_langs = jsonPath + "zim_langs.json"
|
|
zim_groups = jsonPath + "zim_groups.json"
|
|
zim_catalog = jsonPath + "zim_catalog.json"
|
|
zim_urls = jsonPath + "zim_urls.json"
|
|
|
|
try:
|
|
xmlsrc = urllib2.urlopen(kiwixLibUrl)
|
|
tree = ET.parse(xmlsrc)
|
|
xmlsrc.close()
|
|
except (urllib2.URLError) as exc:
|
|
sys.stdout.write("GET-ZIM-LIB ERROR - " + str(exc.reason))
|
|
sys.stdout.flush()
|
|
sys.exit(0)
|
|
|
|
#tree = ET.parse("assets/test.xml")
|
|
|
|
root = tree.getroot()
|
|
|
|
for child in root:
|
|
attributes = {}
|
|
listAttr = {}
|
|
attributes = child.attrib
|
|
if 'id' in attributes and 'language' in attributes and 'creator' in attributes:
|
|
id = attributes['id']
|
|
lang = attributes['language']
|
|
category = attributes['creator']
|
|
url = attributes['url']
|
|
if lang in langGroups: # allow for grouping of language codes
|
|
lang = langGroups[lang]
|
|
if lang not in zimLangCodes:
|
|
zimLangCodes.append(lang)
|
|
if lang not in zimGroups:
|
|
zimGroups[lang] = {}
|
|
if category not in zimGroups[lang]:
|
|
zimGroups[lang][category] = []
|
|
zimGroups[lang][category].append(id)
|
|
urlSlash = url.split('/')
|
|
urlEnd = urlSlash[-1] # last element
|
|
if url in problemUrlMap:
|
|
downloadUrl = problemUrlMap[url]
|
|
else:
|
|
downloadUrl = url.replace("/zim/", "/portable/")
|
|
downloadUrl = downloadUrl.replace(urlEnd, zimKiwixPrefix + urlEnd)
|
|
downloadUrl = downloadUrl.replace(".zim.meta4", ".zip")
|
|
|
|
fileRef = urlEnd.split('.')
|
|
fileRef = fileRef[0]
|
|
permaRefParts = urlEnd.split('_')
|
|
permaRefParts = permaRefParts[0:len(permaRefParts) - 1]
|
|
permaRef = permaRefParts[0]
|
|
for part in permaRefParts[1:]:
|
|
if not part.isdigit():
|
|
permaRef += "_" + part
|
|
attributes['perma_ref'] = permaRef
|
|
#if 'favicon' in attributes:
|
|
# filename = imgPath + id + '.png' # we should possibly use permaRef
|
|
# if os.path.exists(filename):
|
|
# imgdata = base64.b64decode(attributes['favicon'])
|
|
# with open(filename, 'wb') as f:
|
|
# f.write(imgdata)
|
|
# attributes['favicon'] = filename
|
|
zims[id] = attributes
|
|
zimCount += 1
|
|
# print zimCount, attributes['language'], attributes['title'], attributes['description']
|
|
listAttr['download_url'] = downloadUrl
|
|
listAttr['file_ref'] = fileRef
|
|
listAttr['perma_ref'] = permaRef
|
|
listAttr['size'] = attributes['size']
|
|
zimUrls[id] = listAttr
|
|
|
|
zimLangCodes.sort()
|
|
|
|
with open(zim_groups, 'w') as fp:
|
|
json.dump(zimGroups, fp)
|
|
|
|
with open(zim_langs, 'w') as fp:
|
|
json.dump(zimLangCodes, fp)
|
|
|
|
with open(zim_catalog, 'w') as fp:
|
|
json.dump(zims, fp)
|
|
|
|
with open(zim_urls, 'w') as fp:
|
|
json.dump(zimUrls, fp)
|
|
|
|
sys.stdout.write("SUCCESS")
|
|
sys.stdout.flush()
|
|
sys.exit(0)
|
|
|
|
|