libkiwix/scripts/kiwix-resources
Veloman Yunkan 150851b33d kiwix-resources preprocesses all resources
kiwix-resources preprocesses all resources rather than only templates. At
this point this doesn't change anything since only (some) template resources
contain KIWIXCACHEID placeholders. But this enhancement opens the door
to the preprocessing of static/skin/index.js (after preprocessing is
able to handle relative links, which comes in the next commit).
2022-05-02 20:37:22 +04:00

128 lines
4.5 KiB
Python
Executable File

#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import hashlib
import os.path
import re
def read_resource_file(resource_file_path):
with open(resource_file_path, 'r') as f:
return [line.strip() for line in f]
def list_resources(resource_file_path):
for resource_path in read_resource_file(resource_file_path):
print(resource_path)
def get_resource_revision(base_dir, resource_path):
with open(os.path.join(base_dir, resource_path), 'rb') as f:
return hashlib.sha1(f.read()).hexdigest()[:8]
resource_revisions = {}
def fill_resource_revisions(resource_file_path):
base_dir = os.path.dirname(os.path.realpath(resource_file_path))
for resource in read_resource_file(resource_file_path):
resource_revisions[resource] = get_resource_revision(base_dir, resource)
RESOURCE_WITH_CACHEID_URL_PATTERN=r'([^"?]+)\?KIWIXCACHEID([^"]*)'
def set_cacheid(resource_matchobj):
path = resource_matchobj.group(1)
resource = path
root_prefix = '{{root}}/'
if resource.startswith(root_prefix):
resource = resource[len(root_prefix):]
extra_query = resource_matchobj.group(2)
cacheid = 'cacheid=' + resource_revisions[resource]
return f'{path}?{cacheid}{extra_query}'
def preprocess_line(line):
if 'KIWIXCACHEID' in line:
line = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, line)
assert not 'KIWIXCACHEID' in line
return line
def get_preprocessed_resource(srcpath):
modified_line_count = 0
preprocessed_lines = []
try:
with open(srcpath, 'r') as source:
for line in source:
ppline = preprocess_line(line)
if ppline != line:
modified_line_count += 1
preprocessed_lines.append(ppline)
return "".join(preprocessed_lines), modified_line_count
except UnicodeDecodeError:
# It was a binary resource
return None, 0
def symlink_resource(src, resource_path):
if os.path.exists(resource_path):
if os.path.islink(resource_path) and os.readlink(resource_path) == src:
return
os.remove(resource_path)
os.symlink(src, resource_path)
def preprocess_resource(srcdir, resource_path, outdir):
print(f'Preprocessing {resource_path}...')
resource_dir = os.path.dirname(resource_path)
if resource_dir != '':
os.makedirs(os.path.join(outdir, resource_dir), exist_ok=True)
srcpath = os.path.join(srcdir, resource_path)
outpath = os.path.join(outdir, resource_path)
preprocessed_content, modified_line_count = get_preprocessed_resource(srcpath)
if modified_line_count == 0:
symlink_resource(srcpath, outpath)
else:
with open(outpath, 'w') as target:
print(preprocessed_content, end='', file=target)
def copy_file(src_path, dst_path):
with open(src_path, 'rb') as src:
with open(dst_path, 'wb') as dst:
dst.write(src.read())
def preprocess_resources(resource_file_path, outdir):
base_dir = os.path.dirname(os.path.realpath(resource_file_path))
resource_filename = os.path.basename(resource_file_path)
for resource in read_resource_file(resource_file_path):
preprocess_resource(base_dir, resource, outdir)
copy_file(resource_file_path, os.path.join(outdir, resource_filename))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
commands = parser.add_mutually_exclusive_group()
commands.add_argument('--list-all', action='store_true')
commands.add_argument('--preprocess', action='store_true')
parser.add_argument('--outdir')
parser.add_argument('resource_file')
args = parser.parse_args()
if args.list_all:
list_resources(args.resource_file)
elif args.preprocess:
fill_resource_revisions(args.resource_file)
preprocess_resources(args.resource_file, args.outdir)