1
0
Fork 0
mirror of https://github.com/iiab/iiab.git synced 2025-02-13 19:52:06 +00:00
iiab/roles/kiwix/templates/iiab-make-kiwix-lib.py
2019-11-28 05:39:24 -06:00

326 lines
11 KiB
Python
Executable file

#!/usr/bin/python3
"""
Creates temp library.xml file for kiwix from contents of /zims/content and index
Updated to handle incremental additions and deletions
Author: Tim Moody <tim(at)timmoody(dot)com>
Contributors: Jerry Vonau <jvonau3(at)gmail.com>
"""
import os, sys, syslog
import pwd, grp
import time
from datetime import date, datetime
import json
import yaml
import re
import subprocess
import shlex
#import ConfigParser
import configparser
import xml.etree.ElementTree as ET
import argparse
import fnmatch
IIAB_PATH='/etc/iiab'
if not IIAB_PATH in sys.path:
sys.path.append(IIAB_PATH)
from iiab_env import get_iiab_env
KIWIX_CAT = IIAB_PATH + '/kiwix_catalog.json'
# Config Files
# iiab_ini_file should be in {{ iiab_env_file }} (/etc/iiab/iiab.env) ?
#iiab_ini_file = "{{ iiab_ini_file }}" # nominally /etc/iiab/iiab.ini
iiab_ini_file = "/etc/iiab/iiab.ini" # comment out after testing
IIAB_INI = get_iiab_env('IIAB_INI') # future
if IIAB_INI:
iiab_ini_file = IIAB_INI
# Variables that should be read from config file
# All of these variables will be read from config files and recomputed in init()
zim_path = "/library/zims"
iiab_base_path = "/opt/iiab"
kiwix_manage = iiab_base_path + "/kiwix/bin/kiwix-manage"
doc_root = get_iiab_env('WWWROOT')
zim_version_idx_dir = doc_root + "/common/assets/"
zim_version_idx_file = "zim_version_idx.json"
#zim_version_idx_file = "zim_version_idx_test.json"
menuDefs = doc_root + "/js-menu/menu-files/menu-defs/"
menuImages = doc_root + "/js-menu/menu-files/images/"
menuJsonPath = doc_root + "/home/menu.json"
assets_dir = doc_root + "/common/assets/"
lang_codes_path = assets_dir + "lang_codes.json"
lang_codes = {}
old_zim_map = {"bad.zim" : "unparseable name"}
# Working variables
# zim_files - list of zims and possible index from file system
# path_to_id_map - list of zims in current library.xml with id (for delete)
zim_versions = {} # map of zim's generic name to version installed, e.g. wikipedia_es_all to wikipedia_es_all_2017-01
def main():
"""Server routine"""
global kiwix_library_xml
global zim_path
global zim_version_idx_dir
global zim_version_idx_file
init()
args = parse_args()
if args.device: # allow override of path
zim_path = args.device + zim_path
zim_version_idx_dir = args.device + zim_version_idx_dir
kiwix_library_xml = zim_path + "/library.xml"
if not args.no_tmp: # don't append .tmp
kiwix_library_xml += ".tmp"
# remove existing file if force
if args.force:
try:
os.remove(kiwix_library_xml)
except OSError:
pass
zims_installed = {}
path_to_id_map = {}
else:
zims_installed, path_to_id_map = read_library_xml(kiwix_library_xml)
zim_files = get_zim_list(zim_path)
# Remove zims not in file system from library.xml
remove_list_str = ""
for item in path_to_id_map:
if item not in zim_files:
rem_libr_xml(path_to_id_map[item])
# Add zims from file system that are not in library.xml
for item in zim_files:
if item not in path_to_id_map:
add_libr_xml(kiwix_library_xml, zim_path, item, zim_files[item])
print("Writing zim_versions_idx")
write_zim_versions_idx()
sys.exit()
def get_zim_list(path):
files_processed = {}
zim_list = []
content = path + "/content/"
index = path + "/index/"
flist = os.listdir(content)
flist.sort()
for filename in flist:
zimpos = filename.find(".zim")
if zimpos != -1:
zim_info = {}
filename = filename[:zimpos]
zimname = "content/" + filename + ".zim"
zimidx = "index/" + filename + ".zim.idx"
if zimname not in files_processed:
if not os.path.isdir (path + "/" + zimidx): # only declare index if exists (could be embedded)
zimidx = None
files_processed[zimname] = zimidx
zimname = content + filename + ".zim"
zimidx = index + filename + ".zim.idx"
if filename in old_zim_map: # handle old names that don't parse
wiki_name = old_zim_map[filename]
else:
ulpos = filename.rfind("_")
# but old gutenberg and some other names are not canonical
if filename.rfind("-") < 0: # non-canonical name
ulpos = filename[:ulpos].rfind("_")
wiki_name = filename[:ulpos]
zim_info['file_name'] = filename
zim_versions[wiki_name] = zim_info # if there are multiples, last should win
return files_processed
def read_library_xml(lib_xml_file, kiwix_exclude_attr=[""]): # duplicated from iiab-cmdsrv
kiwix_exclude_attr.append("id") # don't include id
kiwix_exclude_attr.append("favicon") # don't include large favicon
zims_installed = {}
path_to_id_map = {}
try:
tree = ET.parse(lib_xml_file)
root = tree.getroot()
xml_item_no = 0
for child in root:
#xml_item_no += 1 # hopefully this is the array number
attributes = {}
if 'id' not in child.attrib: # is this necessary? implies there are records with no book id which would break index for removal
print ("xml record missing Book Id")
id = child.attrib['id']
for attr in child.attrib:
if attr not in kiwix_exclude_attr:
attributes[attr] = child.attrib[attr] # copy if not id or in exclusion list
zims_installed[id] = attributes
path_to_id_map[child.attrib['path']] = id
except IOError:
zims_installed = {}
return zims_installed, path_to_id_map
def rem_libr_xml(id):
command = kiwix_manage + " " + kiwix_library_xml + " remove " + id
#print command
args = shlex.split(command)
try:
outp = subprocess.check_output(args)
except subprocess.CalledProcessError as e:
if e.returncode != 2: # skip bogus file open error in kiwix-manage
print (outp)
def add_libr_xml(kiwix_library_xml, zim_path, zimname, zimidx):
command = kiwix_manage + " " + kiwix_library_xml + " add " + zim_path + "/" + zimname
if zimidx:
command += " -i " + zim_path + "/" + zimidx
#print command
args = shlex.split(command)
try:
outp = subprocess.check_output(args)
except: #skip things that don't work
#print 'skipping ' + zimname
pass
def read_lang_codes():
global lang_codes
with open(lang_codes_path,"r") as f:
reads = f.read()
#print("menu.json:%s"%reads)
lang_codes = json.loads(reads)
def kiwix_lang_to_iso2(zim_lang_code):
return lang_codes[zim_lang_code]['iso2']
def init():
global iiab_base_path
global zim_path
global kiwix_library_xml
global kiwix_manage
# config = ConfigParser.SafeConfigParser()
config = configparser.ConfigParser()
config.read(iiab_ini_file)
iiab_base_path = config.get('location','iiab_base')
zim_path = config.get('kiwix','iiab_zim_path')
kiwix_library_xml = config.get('kiwix','kiwix_library_xml')
kiwix_manage = iiab_base_path + "/kiwix/bin/kiwix-manage"
read_lang_codes()
def parse_args():
parser = argparse.ArgumentParser(description="Create library.xml for Kiwix.")
parser.add_argument("--device", help="no trailing /. change the target device from internal storage to something else like /media/usb0")
parser.add_argument("--no_tmp", help="don't append .tmp to the library.xml name", action="store_true")
parser.add_argument("-f", "--force", help="force complete rebuild of library.xml", action="store_true")
parser.add_argument("-v", "--verbose", help="Print messages.", action="store_true")
return parser.parse_args()
def write_zim_versions_idx():
global zim_versions
zims_installed,path_to_id_map = read_library_xml(kiwix_library_xml)
for perma_ref in zim_versions:
zim_versions[perma_ref]['menu_item'] = find_menuitem_from_zimname(perma_ref)
articlecount,mediacount,size,tags,lang,date = \
get_substitution_data(perma_ref, zims_installed, path_to_id_map)
zim_versions[perma_ref]['article_count'] = articlecount
zim_versions[perma_ref]['media_count'] = mediacount
size = human_readable(float(size) * 1024) # kiwix reports in K
zim_versions[perma_ref]['size'] = size
zim_versions[perma_ref]['tags'] = tags
zim_versions[perma_ref]['language'] = lang
zim_versions[perma_ref]['zim_date'] = date
# Write Version Map
if os.path.isdir(zim_version_idx_dir):
with open(zim_version_idx_dir + zim_version_idx_file, 'w') as fp:
fp.write(json.dumps(zim_versions,indent=2 ))
fp.close()
else:
print (zim_version_idx_dir + " not found.")
def get_substitution_data(perma_ref,zims_installed, path_to_id_map):
#reconstruct the path in the id map
path = 'content/' + zim_versions[perma_ref]['file_name'] + '.zim'
id = path_to_id_map[path]
item = zims_installed[id]
if len(item) != 0 or perma_ref == 'test':
mediacount = item.get('mediaCount','')
articlecount = item.get('articleCount','')
size = item.get('size','')
tags = item.get('tags','')
zim_lang = item.get('language')
menu_def_lang = kiwix_lang_to_iso2(zim_lang)
date = item.get('date','')
return (articlecount,mediacount,size,tags,menu_def_lang,date)
return ('0','0','0','0','0','0')
def get_menu_def_zimnames(intended_use='zim'):
menu_def_dict = {}
os.chdir(menuDefs)
for filename in os.listdir('.'):
if fnmatch.fnmatch(filename, '*.json'):
try:
with open(filename,'r') as json_file:
readstr = json_file.read()
data = json.loads(readstr)
except:
print("failed to parse %s"%filename)
print(readstr)
if data.get('intended_use','') != 'zim':
continue
zimname = data.get('zim_name','')
if zimname != '':
menu_def_dict[data['zim_name']] = menuDefs + filename
return menu_def_dict
def find_menuitem_from_zimname(zimname):
defs = get_menu_def_zimnames()
defs_filename = defs.get(zimname,'')
if defs_filename != '':
#print("reading menu-def:%s"%defs_filename)
with open(defs_filename,'r') as json_file:
readstr = json_file.read()
data = json.loads(readstr)
return data.get('menu_item_name','')
return ''
def get_kiwix_catalog_item(perma_ref):
# Read the kiwix catalog
with open(KIWIX_CAT, 'r') as kiwix_cat:
json_data = kiwix_cat.read()
download = json.loads(json_data)
zims = download['zims']
for uuid in zims.keys():
#print("%s %s"%(zims[uuid]['perma_ref'],perma_ref,))
if zims[uuid]['perma_ref'] == perma_ref:
return zims[uuid]
return {}
def human_readable(num):
# return 3 significant digits and unit specifier
# TFM 7/15/2019 change to factor of 1024, not 1000 to match similar calcs elsewhere
num = float(num)
units = [ '','K','M','G']
for i in range(4):
if num<10.0:
return "%.2f%s"%(num,units[i])
if num<100.0:
return "%.1f%s"%(num,units[i])
if num < 1000.0:
return "%.0f%s"%(num,units[i])
num /= 1024.0
# Now start the application
if __name__ == "__main__":
# Run the main routine
main()