#!/usr/bin/python # Not currently used import xml.etree.ElementTree as ET import json import csv import operator import base64 import os.path import sys import urllib2 XSCE_PATH='/etc/xsce' if not XSCE_PATH in sys.path: sys.path.append(XSCE_PATH) from xsce_env import get_xsce_env zimLangCodes = [] zimGroups = {} zimUrls = {} zims = {} zimUrls = {} zimGroups = {} zimLangCodes = [] zimCount = 0 langGroups = {"en":"eng"} # as of Mar 21, 2015 problemUrlMap = {"http://download.kiwix.org/zim/0.9/wikipedia_en_ray_charles_03_2013.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_en_ray_charles_03_2013.zip", \ "http://download.kiwix.org/zim/0.9/wikipedia_en_wp1_0.8_45000+_12_2010.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_en_wp1_0.8_45000+_12_2010.zip", \ "http://download.kiwix.org/zim/0.9/wikipedia_es_venezuela_11_2012.zim.meta4":"http://download.kiwix.org/portable/wikipedia/kiwix-0.9+wikipedia_es_venezuela_11_2012.zip", \ "http://download.kiwix.org/zim/wikispecies/wikispecies_en_all_nopic_2015-03.zim.meta4":"http://download.kiwix.org/portable/wikispecies/kiwix-+wikispecies_en_all_nopic_2015-03.zip", \ "http://download.kiwix.org/zim/wikispecies/wikispecies_en_all_2015-03.zim.meta4":"http://download.kiwix.org/portable/wikispecies/kiwix-+wikispecies_en_all_2015-03.zip"} WWWROOT = get_xsce_env('WWWROOT') consolePath = WWWROOT + "/devel/" assetsPath = consolePath + "assets/" imgPath = consolePath + "images/" jsonPath = "/etc/xsce/" # input file #xmlfile = assetsPath + "library.xml" kiwixLibUrl = 'http://www.kiwix.org/library.xml' zimKiwixUrl = "http://download.kiwix.org/portable/" zimKiwixPrefix = "kiwix-0.9+" # output files zim_langs = jsonPath + "zim_langs.json" zim_groups = jsonPath + "zim_groups.json" zim_catalog = jsonPath + "zim_catalog.json" zim_urls = jsonPath + "zim_urls.json" try: xmlsrc = urllib2.urlopen(kiwixLibUrl) tree = ET.parse(xmlsrc) xmlsrc.close() except (urllib2.URLError) as exc: sys.stdout.write("GET-ZIM-LIB ERROR - " + str(exc.reason)) sys.stdout.flush() sys.exit(0) #tree = ET.parse("assets/test.xml") root = tree.getroot() for child in root: attributes = {} listAttr = {} attributes = child.attrib if 'id' in attributes and 'language' in attributes and 'creator' in attributes: id = attributes['id'] lang = attributes['language'] category = attributes['creator'] url = attributes['url'] if lang in langGroups: # allow for grouping of language codes lang = langGroups[lang] if lang not in zimLangCodes: zimLangCodes.append(lang) if lang not in zimGroups: zimGroups[lang] = {} if category not in zimGroups[lang]: zimGroups[lang][category] = [] zimGroups[lang][category].append(id) urlSlash = url.split('/') urlEnd = urlSlash[-1] # last element if url in problemUrlMap: downloadUrl = problemUrlMap[url] else: downloadUrl = url.replace("/zim/", "/portable/") downloadUrl = downloadUrl.replace(urlEnd, zimKiwixPrefix + urlEnd) downloadUrl = downloadUrl.replace(".zim.meta4", ".zip") fileRef = urlEnd.split('.') fileRef = fileRef[0] permaRefParts = urlEnd.split('_') permaRefParts = permaRefParts[0:len(permaRefParts) - 1] permaRef = permaRefParts[0] for part in permaRefParts[1:]: if not part.isdigit(): permaRef += "_" + part attributes['perma_ref'] = permaRef #if 'favicon' in attributes: # filename = imgPath + id + '.png' # we should possibly use permaRef # if os.path.exists(filename): # imgdata = base64.b64decode(attributes['favicon']) # with open(filename, 'wb') as f: # f.write(imgdata) # attributes['favicon'] = filename zims[id] = attributes zimCount += 1 # print zimCount, attributes['language'], attributes['title'], attributes['description'] listAttr['download_url'] = downloadUrl listAttr['file_ref'] = fileRef listAttr['perma_ref'] = permaRef listAttr['size'] = attributes['size'] zimUrls[id] = listAttr zimLangCodes.sort() with open(zim_groups, 'w') as fp: json.dump(zimGroups, fp) with open(zim_langs, 'w') as fp: json.dump(zimLangCodes, fp) with open(zim_catalog, 'w') as fp: json.dump(zims, fp) with open(zim_urls, 'w') as fp: json.dump(zimUrls, fp) sys.stdout.write("SUCCESS") sys.stdout.flush() sys.exit(0)