#!/usr/bin/python """ Creates temp library.xml file for kiwix from contents of /zims/content and index Updated to handle incremental additions and deletions Author: Tim Moody Contributors: Jerry Vonau """ import os, sys, syslog import pwd, grp import time from datetime import date, datetime import json import yaml import re import subprocess import shlex import ConfigParser import xml.etree.ElementTree as ET import argparse import fnmatch IIAB_PATH='/etc/iiab' if not IIAB_PATH in sys.path: sys.path.append(IIAB_PATH) from iiab_env import get_iiab_env KIWIX_CAT = IIAB_PATH + '/kiwix_catalog.json' # Config Files # iiab_ini_file should be in {{ iiab_env_file }} (/etc/iiab/iiab.env) ? #iiab_ini_file = "{{ iiab_ini_file }}" # nominally /etc/iiab/iiab.ini iiab_ini_file = "/etc/iiab/iiab.ini" # comment out after testing IIAB_INI = get_iiab_env('IIAB_INI') # future if IIAB_INI: iiab_ini_file = IIAB_INI # Variables that should be read from config file # All of these variables will be read from config files and recomputed in init() zim_path = "/library/zims" iiab_base_path = "/opt/iiab" kiwix_manage = iiab_base_path + "/kiwix/bin/kiwix-manage" doc_root = get_iiab_env('WWWROOT') zim_version_idx_dir = doc_root + "/common/assets/" zim_version_idx_file = "zim_version_idx.json" #zim_version_idx_file = "zim_version_idx_test.json" menuDefs = doc_root + "/js-menu/menu-files/menu-defs/" menuImages = doc_root + "/js-menu/menu-files/images/" menuJsonPath = doc_root + "/home/menu.json" old_zim_map = {"bad.zim" : "unparseable name"} # Working variables # zim_files - list of zims and possible index from file system # path_to_id_map - list of zims in current library.xml with id (for delete) zim_versions = {} # map of zim's generic name to version installed, e.g. wikipedia_es_all to wikipedia_es_all_2017-01 def main(): """Server routine""" global kiwix_library_xml global zim_path global zim_version_idx_dir global zim_version_idx_file init() args = parse_args() if args.device: # allow override of path zim_path = args.device + zim_path zim_version_idx_dir = args.device + zim_version_idx_dir kiwix_library_xml = zim_path + "/library.xml" if not args.no_tmp: # don't append .tmp kiwix_library_xml += ".tmp" # remove existing file if force if args.force: try: os.remove(kiwix_library_xml) except OSError: pass zims_installed = {} path_to_id_map = {} else: zims_installed, path_to_id_map = read_library_xml(kiwix_library_xml) zim_files = get_zim_list(zim_path) # Remove zims not in file system from library.xml remove_list_str = "" for item in path_to_id_map: if item not in zim_files: rem_libr_xml(path_to_id_map[item]) # Add zims from file system that are not in library.xml for item in zim_files: if item not in path_to_id_map: add_libr_xml(kiwix_library_xml, zim_path, item, zim_files[item]) print("Writing zim_versions_idx") write_zim_versions_idx() sys.exit() def get_zim_list(path): files_processed = {} zim_list = [] content = path + "/content/" index = path + "/index/" flist = os.listdir(content) flist.sort() for filename in flist: zimpos = filename.find(".zim") if zimpos != -1: zim_info = {} filename = filename[:zimpos] zimname = "content/" + filename + ".zim" zimidx = "index/" + filename + ".zim.idx" if zimname not in files_processed: if not os.path.isdir (path + "/" + zimidx): # only declare index if exists (could be embedded) zimidx = None files_processed[zimname] = zimidx zimname = content + filename + ".zim" zimidx = index + filename + ".zim.idx" if filename in old_zim_map: # handle old names that don't parse wiki_name = old_zim_map[filename] else: ulpos = filename.rfind("_") # but old gutenberg and some other names are not canonical if filename.rfind("-") < 0: # non-canonical name ulpos = filename[:ulpos].rfind("_") wiki_name = filename[:ulpos] zim_info['file_name'] = filename zim_versions[wiki_name] = zim_info # if there are multiples, last should win return files_processed def read_library_xml(lib_xml_file, kiwix_exclude_attr=[""]): # duplicated from iiab-cmdsrv kiwix_exclude_attr.append("id") # don't include id kiwix_exclude_attr.append("favicon") # don't include large favicon zims_installed = {} path_to_id_map = {} try: tree = ET.parse(lib_xml_file) root = tree.getroot() xml_item_no = 0 for child in root: #xml_item_no += 1 # hopefully this is the array number attributes = {} if 'id' not in child.attrib: # is this necessary? implies there are records with no book id which would break index for removal print "xml record missing Book Id" id = child.attrib['id'] for attr in child.attrib: if attr not in kiwix_exclude_attr: attributes[attr] = child.attrib[attr] # copy if not id or in exclusion list zims_installed[id] = attributes path_to_id_map[child.attrib['path']] = id except IOError: zims_installed = {} return zims_installed, path_to_id_map def rem_libr_xml(id): command = kiwix_manage + " " + kiwix_library_xml + " remove " + id #print command args = shlex.split(command) try: outp = subprocess.check_output(args) except subprocess.CalledProcessError as e: if e.returncode != 2: # skip bogus file open error in kiwix-manage print outp def add_libr_xml(kiwix_library_xml, zim_path, zimname, zimidx): command = kiwix_manage + " " + kiwix_library_xml + " add " + zim_path + "/" + zimname if zimidx: command += " -i " + zim_path + "/" + zimidx #print command args = shlex.split(command) try: outp = subprocess.check_output(args) except: #skip things that don't work #print 'skipping ' + zimname pass def init(): global iiab_base_path global zim_path global kiwix_library_xml global kiwix_manage config = ConfigParser.SafeConfigParser() config.read(iiab_ini_file) iiab_base_path = config.get('location','iiab_base') zim_path = config.get('kiwix','iiab_zim_path') kiwix_library_xml = config.get('kiwix','kiwix_library_xml') kiwix_manage = iiab_base_path + "/kiwix/bin/kiwix-manage" def parse_args(): parser = argparse.ArgumentParser(description="Create library.xml for Kiwix.") parser.add_argument("--device", help="no trailing /. change the target device from internal storage to something else like /media/usb0") parser.add_argument("--no_tmp", help="don't append .tmp to the library.xml name", action="store_true") parser.add_argument("-f", "--force", help="force complete rebuild of library.xml", action="store_true") parser.add_argument("-v", "--verbose", help="Print messages.", action="store_true") return parser.parse_args() def write_zim_versions_idx(): global zim_versions zims_installed,path_to_id_map = read_library_xml(kiwix_library_xml) for perma_ref in zim_versions: zim_versions[perma_ref]['menu_item'] = find_menuitem_from_zimname(perma_ref) articlecount,mediacount,size,tags,lang,date = \ get_substitution_data(perma_ref, zims_installed, path_to_id_map) zim_versions[perma_ref]['article_count'] = articlecount zim_versions[perma_ref]['media_count'] = mediacount size = human_readable(size) zim_versions[perma_ref]['size'] = size zim_versions[perma_ref]['tags'] = tags zim_versions[perma_ref]['language'] = lang zim_versions[perma_ref]['zim_date'] = date # Write Version Map if os.path.isdir(zim_version_idx_dir): with open(zim_version_idx_dir + zim_version_idx_file, 'w') as fp: fp.write(json.dumps(zim_versions,indent=2 )) fp.close() else: print zim_version_idx_dir + " not found." def get_substitution_data(perma_ref,zims_installed, path_to_id_map): #reconstruct the path in the id map path = 'content/' + zim_versions[perma_ref]['file_name'] + '.zim' id = path_to_id_map[path] item = zims_installed[id] if len(item) != 0 or perma_ref == 'test': mediacount = item.get('mediaCount','') articlecount = item.get('articleCount','') size = item.get('size','') tags = item.get('tags','') lang = item.get('language','') if len(lang) > 2: lang = lang[:2] date = item.get('date','') return (articlecount,mediacount,size,tags,lang,date) return ('0','0','0','0','0','0') def get_menu_def_zimnames(intended_use='zim'): menu_def_dict = {} os.chdir(menuDefs) for filename in os.listdir('.'): if fnmatch.fnmatch(filename, '*.json'): try: with open(filename,'r') as json_file: readstr = json_file.read() data = json.loads(readstr) except: print("failed to parse %s"%filename) print(readstr) if data.get('intended_use','') != 'zim': continue zimname = data.get('zim_name','') if zimname != '': menu_def_dict[data['zim_name']] = menuDefs + filename return menu_def_dict def find_menuitem_from_zimname(zimname): defs = get_menu_def_zimnames() defs_filename = defs.get(zimname,'') if defs_filename != '': #print("reading menu-def:%s"%defs_filename) with open(defs_filename,'r') as json_file: readstr = json_file.read() data = json.loads(readstr) return data.get('menu_item_name','') return '' def get_kiwix_catalog_item(perma_ref): # Read the kiwix catalog with open(KIWIX_CAT, 'r') as kiwix_cat: json_data = kiwix_cat.read() download = json.loads(json_data) zims = download['zims'] for uuid in zims.keys(): #print("%s %s"%(zims[uuid]['perma_ref'],perma_ref,)) if zims[uuid]['perma_ref'] == perma_ref: return zims[uuid] return {} def human_readable(num): # return 3 significant digits and unit specifier num = float(num) units = [ '','K','M','G'] for i in range(4): if num<10.0: return "%.2f%s"%(num,units[i]) if num<100.0: return "%.1f%s"%(num,units[i]) if num < 1000.0: return "%.0f%s"%(num,units[i]) num /= 1000.0 # Now start the application if __name__ == "__main__": # Run the main routine main()