1
0
Fork 0
mirror of https://github.com/iiab/iiab.git synced 2025-02-12 11:12:06 +00:00

Merge pull request #3835 from tim-moody/zimit

only support versioned zims if have YYYY-MM ending
This commit is contained in:
Tim Moody 2024-10-23 10:27:15 -04:00 committed by GitHub
commit 1d9c0199cb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -6,6 +6,7 @@ import os
import json
import subprocess
import shlex
import re
import xml.etree.ElementTree as ET
import iiab.iiab_const as CONST
@ -46,11 +47,17 @@ def get_zim_list(path):
if filename in CONST.old_zim_map: # handle old names that don't parse
perma_ref = CONST.old_zim_map[filename]
else:
ulpos = filename.rfind("_")
# but old gutenberg and some other names are not canonical
if filename.rfind("-") < 0: # non-canonical name
ulpos = filename[:ulpos].rfind("_")
perma_ref = filename[:ulpos]
# handle various zim name patterns:
# 1. canonical zim ending in _YYYY-MM
# as of 10/16/2024 it looks like all Kiwix zims fit this pattern
# 2. otherwise assume no versioning and perma_ref = filename
match = re.search("_[0-5][0-9][0-5][0-9]-[0-5][0-9]$", filename)
if match:
perma_ref = filename[: match.span()[0]]
else:
perma_ref = filename
zim_info['file_name'] = filename
zim_versions[perma_ref] = zim_info # if there are multiples, last should win
return files_processed, zim_versions