2017-05-27 18:09:50 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2018-07-23 19:18:31 +00:00
|
|
|
Creates temp library.xml file for kiwix from contents of /zims/content and index
|
2018-09-04 17:49:26 +00:00
|
|
|
Updated to handle incremental additions and deletions
|
2017-05-27 18:09:50 +00:00
|
|
|
|
|
|
|
Author: Tim Moody <tim(at)timmoody(dot)com>
|
2017-07-04 20:05:10 +00:00
|
|
|
Contributors: Jerry Vonau <jvonau3(at)gmail.com>
|
2017-05-27 18:09:50 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
import os, sys, syslog
|
|
|
|
import pwd, grp
|
|
|
|
import time
|
|
|
|
from datetime import date, datetime
|
|
|
|
import json
|
|
|
|
import yaml
|
|
|
|
import re
|
|
|
|
import subprocess
|
|
|
|
import shlex
|
|
|
|
import ConfigParser
|
2018-09-04 17:49:26 +00:00
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
import argparse
|
2019-01-10 22:26:47 +00:00
|
|
|
import fnmatch
|
2018-09-04 17:49:26 +00:00
|
|
|
|
2017-07-04 20:05:10 +00:00
|
|
|
IIAB_PATH='/etc/iiab'
|
|
|
|
if not IIAB_PATH in sys.path:
|
2018-09-04 17:49:26 +00:00
|
|
|
sys.path.append(IIAB_PATH)
|
2017-06-09 23:25:56 +00:00
|
|
|
from iiab_env import get_iiab_env
|
2019-01-10 22:26:47 +00:00
|
|
|
KIWIX_CAT = IIAB_PATH + '/kiwix_catalog.json'
|
2017-05-27 18:09:50 +00:00
|
|
|
|
|
|
|
# Config Files
|
2018-10-15 11:10:58 +00:00
|
|
|
# iiab_ini_file should be in {{ iiab_env_file }} (/etc/iiab/iiab.env) ?
|
2019-01-10 22:26:47 +00:00
|
|
|
#iiab_ini_file = "{{ iiab_ini_file }}" # nominally /etc/iiab/iiab.ini
|
|
|
|
iiab_ini_file = "/etc/iiab/iiab.ini" # comment out after testing
|
2018-09-04 17:49:26 +00:00
|
|
|
|
|
|
|
IIAB_INI = get_iiab_env('IIAB_INI') # future
|
|
|
|
if IIAB_INI:
|
2018-10-15 09:13:51 +00:00
|
|
|
iiab_ini_file = IIAB_INI
|
2017-05-27 18:09:50 +00:00
|
|
|
|
|
|
|
# Variables that should be read from config file
|
|
|
|
# All of these variables will be read from config files and recomputed in init()
|
2018-09-04 17:49:26 +00:00
|
|
|
zim_path = "/library/zims"
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2018-09-04 17:49:26 +00:00
|
|
|
iiab_base_path = "/opt/iiab"
|
2017-06-09 23:25:56 +00:00
|
|
|
kiwix_manage = iiab_base_path + "/kiwix/bin/kiwix-manage"
|
|
|
|
doc_root = get_iiab_env('WWWROOT')
|
2018-09-04 17:49:26 +00:00
|
|
|
zim_version_idx_dir = doc_root + "/common/assets/"
|
|
|
|
zim_version_idx_file = "zim_version_idx.json"
|
2019-01-10 22:26:47 +00:00
|
|
|
#zim_version_idx_file = "zim_version_idx_test.json"
|
|
|
|
menuDefs = doc_root + "/js-menu/menu-files/menu-defs/"
|
|
|
|
menuImages = doc_root + "/js-menu/menu-files/images/"
|
|
|
|
menuJsonPath = doc_root + "/home/menu.json"
|
2018-09-04 17:49:26 +00:00
|
|
|
|
2017-08-12 20:16:00 +00:00
|
|
|
old_zim_map = {"bad.zim" : "unparseable name"}
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2018-09-04 17:49:26 +00:00
|
|
|
# Working variables
|
|
|
|
# zim_files - list of zims and possible index from file system
|
2018-11-08 22:30:38 +00:00
|
|
|
# path_to_id_map - list of zims in current library.xml with id (for delete)
|
2018-09-04 17:49:26 +00:00
|
|
|
zim_versions = {} # map of zim's generic name to version installed, e.g. wikipedia_es_all to wikipedia_es_all_2017-01
|
|
|
|
|
2017-05-27 18:09:50 +00:00
|
|
|
def main():
|
|
|
|
"""Server routine"""
|
2018-07-23 19:18:31 +00:00
|
|
|
global kiwix_library_xml
|
2018-09-04 17:49:26 +00:00
|
|
|
global zim_path
|
|
|
|
global zim_version_idx_dir
|
|
|
|
global zim_version_idx_file
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2018-09-04 17:49:26 +00:00
|
|
|
init()
|
|
|
|
args = parse_args()
|
|
|
|
if args.device: # allow override of path
|
|
|
|
zim_path = args.device + zim_path
|
|
|
|
zim_version_idx_dir = args.device + zim_version_idx_dir
|
|
|
|
|
|
|
|
kiwix_library_xml = zim_path + "/library.xml"
|
|
|
|
if not args.no_tmp: # don't append .tmp
|
|
|
|
kiwix_library_xml += ".tmp"
|
|
|
|
|
|
|
|
# remove existing file if force
|
|
|
|
if args.force:
|
|
|
|
try:
|
|
|
|
os.remove(kiwix_library_xml)
|
|
|
|
except OSError:
|
|
|
|
pass
|
|
|
|
zims_installed = {}
|
2018-11-08 22:30:38 +00:00
|
|
|
path_to_id_map = {}
|
2018-09-04 17:49:26 +00:00
|
|
|
else:
|
2018-11-08 22:30:38 +00:00
|
|
|
zims_installed, path_to_id_map = read_library_xml(kiwix_library_xml)
|
2018-09-04 17:49:26 +00:00
|
|
|
|
|
|
|
zim_files = get_zim_list(zim_path)
|
|
|
|
|
|
|
|
# Remove zims not in file system from library.xml
|
|
|
|
remove_list_str = ""
|
2018-11-08 22:30:38 +00:00
|
|
|
for item in path_to_id_map:
|
|
|
|
if item not in zim_files:
|
|
|
|
rem_libr_xml(path_to_id_map[item])
|
2018-09-04 17:49:26 +00:00
|
|
|
|
|
|
|
# Add zims from file system that are not in library.xml
|
|
|
|
for item in zim_files:
|
2018-11-08 22:30:38 +00:00
|
|
|
if item not in path_to_id_map:
|
2018-09-04 17:49:26 +00:00
|
|
|
add_libr_xml(kiwix_library_xml, zim_path, item, zim_files[item])
|
|
|
|
|
2019-01-10 22:26:47 +00:00
|
|
|
print("Writing zim_versions_idx")
|
|
|
|
write_zim_versions_idx()
|
2018-09-04 17:49:26 +00:00
|
|
|
sys.exit()
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2018-09-04 17:49:26 +00:00
|
|
|
def get_zim_list(path):
|
2017-05-27 18:09:50 +00:00
|
|
|
files_processed = {}
|
2018-09-04 17:49:26 +00:00
|
|
|
zim_list = []
|
|
|
|
content = path + "/content/"
|
|
|
|
index = path + "/index/"
|
2017-05-30 17:23:47 +00:00
|
|
|
flist = os.listdir(content)
|
|
|
|
flist.sort()
|
|
|
|
for filename in flist:
|
2017-05-27 18:09:50 +00:00
|
|
|
zimpos = filename.find(".zim")
|
|
|
|
if zimpos != -1:
|
2019-01-10 22:26:47 +00:00
|
|
|
zim_info = {}
|
2017-05-27 18:09:50 +00:00
|
|
|
filename = filename[:zimpos]
|
2018-09-04 17:49:26 +00:00
|
|
|
zimname = "content/" + filename + ".zim"
|
|
|
|
zimidx = "index/" + filename + ".zim.idx"
|
|
|
|
if zimname not in files_processed:
|
|
|
|
if not os.path.isdir (path + "/" + zimidx): # only declare index if exists (could be embedded)
|
|
|
|
zimidx = None
|
|
|
|
files_processed[zimname] = zimidx
|
2017-08-12 20:16:00 +00:00
|
|
|
zimname = content + filename + ".zim"
|
|
|
|
zimidx = index + filename + ".zim.idx"
|
2018-09-04 17:49:26 +00:00
|
|
|
if filename in old_zim_map: # handle old names that don't parse
|
|
|
|
wiki_name = old_zim_map[filename]
|
|
|
|
else:
|
|
|
|
ulpos = filename.rfind("_")
|
2018-11-17 16:13:06 +00:00
|
|
|
# but old gutenberg and some other names are not canonical
|
|
|
|
if filename.rfind("-") < 0: # non-canonical name
|
2018-09-04 17:49:26 +00:00
|
|
|
ulpos = filename[:ulpos].rfind("_")
|
|
|
|
wiki_name = filename[:ulpos]
|
2019-01-10 22:26:47 +00:00
|
|
|
zim_info['file_name'] = filename
|
|
|
|
zim_versions[wiki_name] = zim_info # if there are multiples, last should win
|
2018-09-04 17:49:26 +00:00
|
|
|
return files_processed
|
|
|
|
|
|
|
|
def read_library_xml(lib_xml_file, kiwix_exclude_attr=[""]): # duplicated from iiab-cmdsrv
|
|
|
|
kiwix_exclude_attr.append("id") # don't include id
|
|
|
|
kiwix_exclude_attr.append("favicon") # don't include large favicon
|
|
|
|
zims_installed = {}
|
2018-11-08 22:30:38 +00:00
|
|
|
path_to_id_map = {}
|
2018-09-04 17:49:26 +00:00
|
|
|
try:
|
|
|
|
tree = ET.parse(lib_xml_file)
|
|
|
|
root = tree.getroot()
|
|
|
|
xml_item_no = 0
|
|
|
|
for child in root:
|
2018-11-08 22:30:38 +00:00
|
|
|
#xml_item_no += 1 # hopefully this is the array number
|
2018-09-04 17:49:26 +00:00
|
|
|
attributes = {}
|
|
|
|
if 'id' not in child.attrib: # is this necessary? implies there are records with no book id which would break index for removal
|
|
|
|
print "xml record missing Book Id"
|
|
|
|
id = child.attrib['id']
|
|
|
|
for attr in child.attrib:
|
|
|
|
if attr not in kiwix_exclude_attr:
|
|
|
|
attributes[attr] = child.attrib[attr] # copy if not id or in exclusion list
|
|
|
|
zims_installed[id] = attributes
|
2018-11-08 22:30:38 +00:00
|
|
|
path_to_id_map[child.attrib['path']] = id
|
2018-09-04 17:49:26 +00:00
|
|
|
except IOError:
|
|
|
|
zims_installed = {}
|
2018-11-08 22:30:38 +00:00
|
|
|
return zims_installed, path_to_id_map
|
2018-09-04 17:49:26 +00:00
|
|
|
|
2018-11-08 22:30:38 +00:00
|
|
|
def rem_libr_xml(id):
|
|
|
|
command = kiwix_manage + " " + kiwix_library_xml + " remove " + id
|
2019-01-10 22:26:47 +00:00
|
|
|
#print command
|
2018-09-04 17:49:26 +00:00
|
|
|
args = shlex.split(command)
|
2018-11-08 22:30:38 +00:00
|
|
|
try:
|
|
|
|
outp = subprocess.check_output(args)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
if e.returncode != 2: # skip bogus file open error in kiwix-manage
|
|
|
|
print outp
|
2018-09-04 17:49:26 +00:00
|
|
|
|
|
|
|
def add_libr_xml(kiwix_library_xml, zim_path, zimname, zimidx):
|
|
|
|
command = kiwix_manage + " " + kiwix_library_xml + " add " + zim_path + "/" + zimname
|
|
|
|
if zimidx:
|
|
|
|
command += " -i " + zim_path + "/" + zimidx
|
2019-01-10 22:26:47 +00:00
|
|
|
#print command
|
2018-09-04 17:49:26 +00:00
|
|
|
args = shlex.split(command)
|
|
|
|
try:
|
|
|
|
outp = subprocess.check_output(args)
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2018-09-04 17:49:26 +00:00
|
|
|
except: #skip things that don't work
|
2019-01-10 22:26:47 +00:00
|
|
|
#print 'skipping ' + zimname
|
2018-09-04 17:49:26 +00:00
|
|
|
pass
|
2017-05-27 18:09:50 +00:00
|
|
|
|
|
|
|
def init():
|
|
|
|
|
2017-06-09 23:25:56 +00:00
|
|
|
global iiab_base_path
|
2018-09-04 17:49:26 +00:00
|
|
|
global zim_path
|
2017-05-27 18:09:50 +00:00
|
|
|
global kiwix_library_xml
|
|
|
|
global kiwix_manage
|
|
|
|
|
|
|
|
config = ConfigParser.SafeConfigParser()
|
2018-10-15 10:06:24 +00:00
|
|
|
config.read(iiab_ini_file)
|
2017-06-09 23:25:56 +00:00
|
|
|
iiab_base_path = config.get('location','iiab_base')
|
2018-09-04 17:49:26 +00:00
|
|
|
zim_path = config.get('kiwix','iiab_zim_path')
|
2017-12-08 18:12:42 +00:00
|
|
|
kiwix_library_xml = config.get('kiwix','kiwix_library_xml')
|
2017-06-09 23:25:56 +00:00
|
|
|
kiwix_manage = iiab_base_path + "/kiwix/bin/kiwix-manage"
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2018-09-04 17:49:26 +00:00
|
|
|
def parse_args():
|
|
|
|
parser = argparse.ArgumentParser(description="Create library.xml for Kiwix.")
|
|
|
|
parser.add_argument("--device", help="no trailing /. change the target device from internal storage to something else like /media/usb0")
|
|
|
|
parser.add_argument("--no_tmp", help="don't append .tmp to the library.xml name", action="store_true")
|
|
|
|
parser.add_argument("-f", "--force", help="force complete rebuild of library.xml", action="store_true")
|
|
|
|
parser.add_argument("-v", "--verbose", help="Print messages.", action="store_true")
|
|
|
|
return parser.parse_args()
|
|
|
|
|
2019-01-10 22:26:47 +00:00
|
|
|
def write_zim_versions_idx():
|
|
|
|
global zim_versions
|
|
|
|
zims_installed,path_to_id_map = read_library_xml(kiwix_library_xml)
|
|
|
|
for perma_ref in zim_versions:
|
|
|
|
zim_versions[perma_ref]['menu_item'] = find_menuitem_from_zimname(perma_ref)
|
|
|
|
articlecount,mediacount,size,tags,lang,date = \
|
|
|
|
get_substitution_data(perma_ref, zims_installed, path_to_id_map)
|
|
|
|
zim_versions[perma_ref]['article_count'] = articlecount
|
|
|
|
zim_versions[perma_ref]['media_count'] = mediacount
|
|
|
|
size = human_readable(size)
|
|
|
|
zim_versions[perma_ref]['size'] = size
|
|
|
|
zim_versions[perma_ref]['tags'] = tags
|
|
|
|
zim_versions[perma_ref]['language'] = lang
|
|
|
|
zim_versions[perma_ref]['zim_date'] = date
|
|
|
|
|
|
|
|
# Write Version Map
|
|
|
|
if os.path.isdir(zim_version_idx_dir):
|
|
|
|
with open(zim_version_idx_dir + zim_version_idx_file, 'w') as fp:
|
|
|
|
fp.write(json.dumps(zim_versions,indent=2 ))
|
|
|
|
fp.close()
|
|
|
|
else:
|
|
|
|
print zim_version_idx_dir + " not found."
|
|
|
|
|
|
|
|
def get_substitution_data(perma_ref,zims_installed, path_to_id_map):
|
|
|
|
#reconstruct the path in the id map
|
|
|
|
path = 'content/' + zim_versions[perma_ref]['file_name'] + '.zim'
|
|
|
|
id = path_to_id_map[path]
|
|
|
|
item = zims_installed[id]
|
|
|
|
if len(item) != 0 or perma_ref == 'test':
|
|
|
|
mediacount = item.get('mediaCount','')
|
|
|
|
articlecount = item.get('articleCount','')
|
|
|
|
size = item.get('size','')
|
|
|
|
tags = item.get('tags','')
|
|
|
|
lang = item.get('language','')
|
|
|
|
if len(lang) > 2:
|
|
|
|
lang = lang[:2]
|
|
|
|
date = item.get('date','')
|
|
|
|
return (articlecount,mediacount,size,tags,lang,date)
|
|
|
|
return ('0','0','0','0','0','0')
|
|
|
|
|
|
|
|
def get_menu_def_zimnames(intended_use='zim'):
|
|
|
|
menu_def_dict = {}
|
|
|
|
os.chdir(menuDefs)
|
|
|
|
for filename in os.listdir('.'):
|
|
|
|
if fnmatch.fnmatch(filename, '*.json'):
|
|
|
|
try:
|
|
|
|
with open(filename,'r') as json_file:
|
|
|
|
readstr = json_file.read()
|
|
|
|
data = json.loads(readstr)
|
|
|
|
except:
|
|
|
|
print("failed to parse %s"%filename)
|
|
|
|
print(readstr)
|
|
|
|
if data.get('intended_use','') != 'zim':
|
|
|
|
continue
|
|
|
|
zimname = data.get('zim_name','')
|
|
|
|
if zimname != '':
|
|
|
|
menu_def_dict[data['zim_name']] = menuDefs + filename
|
|
|
|
return menu_def_dict
|
|
|
|
|
|
|
|
def find_menuitem_from_zimname(zimname):
|
|
|
|
defs = get_menu_def_zimnames()
|
|
|
|
defs_filename = defs.get(zimname,'')
|
|
|
|
if defs_filename != '':
|
|
|
|
#print("reading menu-def:%s"%defs_filename)
|
|
|
|
with open(defs_filename,'r') as json_file:
|
|
|
|
readstr = json_file.read()
|
|
|
|
data = json.loads(readstr)
|
|
|
|
return data.get('menu_item_name','')
|
|
|
|
return ''
|
|
|
|
|
|
|
|
def get_kiwix_catalog_item(perma_ref):
|
|
|
|
# Read the kiwix catalog
|
|
|
|
with open(KIWIX_CAT, 'r') as kiwix_cat:
|
|
|
|
json_data = kiwix_cat.read()
|
|
|
|
download = json.loads(json_data)
|
|
|
|
zims = download['zims']
|
|
|
|
for uuid in zims.keys():
|
|
|
|
#print("%s %s"%(zims[uuid]['perma_ref'],perma_ref,))
|
|
|
|
if zims[uuid]['perma_ref'] == perma_ref:
|
|
|
|
return zims[uuid]
|
|
|
|
return {}
|
|
|
|
|
|
|
|
def human_readable(num):
|
|
|
|
# return 3 significant digits and unit specifier
|
|
|
|
num = float(num)
|
|
|
|
units = [ '','K','M','G']
|
|
|
|
for i in range(4):
|
|
|
|
if num<10.0:
|
|
|
|
return "%.2f%s"%(num,units[i])
|
|
|
|
if num<100.0:
|
|
|
|
return "%.1f%s"%(num,units[i])
|
|
|
|
if num < 1000.0:
|
|
|
|
return "%.0f%s"%(num,units[i])
|
|
|
|
num /= 1000.0
|
2017-05-27 18:09:50 +00:00
|
|
|
|
2019-01-10 22:26:47 +00:00
|
|
|
# Now start the application
|
2017-05-27 18:09:50 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
|
|
# Run the main routine
|
|
|
|
main()
|