mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-08-03 10:16:03 -04:00

kiwix-resources preprocesses all resources rather than only templates. At this point this doesn't change anything since only (some) template resources contain KIWIXCACHEID placeholders. But this enhancement opens the door to the preprocessing of static/skin/index.js (after preprocessing is able to handle relative links, which comes in the next commit).
128 lines
4.5 KiB
Python
Executable File
128 lines
4.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
'''
|
|
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or any
|
|
later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301, USA.
|
|
'''
|
|
|
|
import argparse
|
|
import hashlib
|
|
import os.path
|
|
import re
|
|
|
|
def read_resource_file(resource_file_path):
|
|
with open(resource_file_path, 'r') as f:
|
|
return [line.strip() for line in f]
|
|
|
|
def list_resources(resource_file_path):
|
|
for resource_path in read_resource_file(resource_file_path):
|
|
print(resource_path)
|
|
|
|
def get_resource_revision(base_dir, resource_path):
|
|
with open(os.path.join(base_dir, resource_path), 'rb') as f:
|
|
return hashlib.sha1(f.read()).hexdigest()[:8]
|
|
|
|
resource_revisions = {}
|
|
|
|
def fill_resource_revisions(resource_file_path):
|
|
base_dir = os.path.dirname(os.path.realpath(resource_file_path))
|
|
for resource in read_resource_file(resource_file_path):
|
|
resource_revisions[resource] = get_resource_revision(base_dir, resource)
|
|
|
|
RESOURCE_WITH_CACHEID_URL_PATTERN=r'([^"?]+)\?KIWIXCACHEID([^"]*)'
|
|
|
|
def set_cacheid(resource_matchobj):
|
|
path = resource_matchobj.group(1)
|
|
resource = path
|
|
root_prefix = '{{root}}/'
|
|
if resource.startswith(root_prefix):
|
|
resource = resource[len(root_prefix):]
|
|
extra_query = resource_matchobj.group(2)
|
|
cacheid = 'cacheid=' + resource_revisions[resource]
|
|
return f'{path}?{cacheid}{extra_query}'
|
|
|
|
def preprocess_line(line):
|
|
if 'KIWIXCACHEID' in line:
|
|
line = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, line)
|
|
assert not 'KIWIXCACHEID' in line
|
|
return line
|
|
|
|
def get_preprocessed_resource(srcpath):
|
|
modified_line_count = 0
|
|
preprocessed_lines = []
|
|
try:
|
|
with open(srcpath, 'r') as source:
|
|
for line in source:
|
|
ppline = preprocess_line(line)
|
|
if ppline != line:
|
|
modified_line_count += 1
|
|
preprocessed_lines.append(ppline)
|
|
return "".join(preprocessed_lines), modified_line_count
|
|
except UnicodeDecodeError:
|
|
# It was a binary resource
|
|
return None, 0
|
|
|
|
|
|
def symlink_resource(src, resource_path):
|
|
if os.path.exists(resource_path):
|
|
if os.path.islink(resource_path) and os.readlink(resource_path) == src:
|
|
return
|
|
os.remove(resource_path)
|
|
os.symlink(src, resource_path)
|
|
|
|
def preprocess_resource(srcdir, resource_path, outdir):
|
|
print(f'Preprocessing {resource_path}...')
|
|
resource_dir = os.path.dirname(resource_path)
|
|
if resource_dir != '':
|
|
os.makedirs(os.path.join(outdir, resource_dir), exist_ok=True)
|
|
srcpath = os.path.join(srcdir, resource_path)
|
|
outpath = os.path.join(outdir, resource_path)
|
|
preprocessed_content, modified_line_count = get_preprocessed_resource(srcpath)
|
|
if modified_line_count == 0:
|
|
symlink_resource(srcpath, outpath)
|
|
else:
|
|
with open(outpath, 'w') as target:
|
|
print(preprocessed_content, end='', file=target)
|
|
|
|
|
|
def copy_file(src_path, dst_path):
|
|
with open(src_path, 'rb') as src:
|
|
with open(dst_path, 'wb') as dst:
|
|
dst.write(src.read())
|
|
|
|
def preprocess_resources(resource_file_path, outdir):
|
|
base_dir = os.path.dirname(os.path.realpath(resource_file_path))
|
|
resource_filename = os.path.basename(resource_file_path)
|
|
for resource in read_resource_file(resource_file_path):
|
|
preprocess_resource(base_dir, resource, outdir)
|
|
copy_file(resource_file_path, os.path.join(outdir, resource_filename))
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
commands = parser.add_mutually_exclusive_group()
|
|
commands.add_argument('--list-all', action='store_true')
|
|
commands.add_argument('--preprocess', action='store_true')
|
|
parser.add_argument('--outdir')
|
|
parser.add_argument('resource_file')
|
|
args = parser.parse_args()
|
|
|
|
if args.list_all:
|
|
list_resources(args.resource_file)
|
|
elif args.preprocess:
|
|
fill_resource_revisions(args.resource_file)
|
|
preprocess_resources(args.resource_file, args.outdir)
|