mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-08-03 10:16:03 -04:00

During static resource preprocessing and compilation their cacheid values are embedded into libkiwix and can be accessed at runtime. If a static resource is requsted without specifying any cacheid it is served as dynamic content (with short TTL and the library id used for the ETag, though using the cacheid for the ETag would be better). If a cacheid is supplied in the request it must match the cacheid of the resource (otherwise a 404 Not Found error is returned) whereupon the resource is served as immutable content. Known issues: - One issue is caused by the fact that some static resources don't get a cacheid; this is resolved in the next commit. - Interaction of this change with the support for dynamically customizing static resources (via KIWIX_SERVE_CUSTOMIZED_RESOURCES env var) was not addressed.
133 lines
4.7 KiB
Python
Executable File
133 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
'''
|
|
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or any
|
|
later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301, USA.
|
|
'''
|
|
|
|
import argparse
|
|
import hashlib
|
|
import os.path
|
|
import re
|
|
|
|
def read_resource_file(resource_file_path):
|
|
with open(resource_file_path, 'r') as f:
|
|
return [line.strip() for line in f]
|
|
|
|
def list_resources(resource_file_path):
|
|
for resource_path in read_resource_file(resource_file_path):
|
|
print(resource_path)
|
|
|
|
def compute_resource_revision(resource_path):
|
|
with open(os.path.join(OUT_DIR, resource_path), 'rb') as f:
|
|
return hashlib.sha1(f.read()).hexdigest()[:8]
|
|
|
|
resource_revisions = {}
|
|
|
|
def get_resource_revision(res):
|
|
if not res in resource_revisions:
|
|
preprocess_resource(res)
|
|
resource_revisions[res] = compute_resource_revision(res)
|
|
return resource_revisions[res]
|
|
|
|
RESOURCE_WITH_CACHEID_URL_PATTERN=r'(?P<pre>.*/(?P<resource>skin/[^"?]+)\?)KIWIXCACHEID(?P<post>[^"]*)'
|
|
|
|
def set_cacheid(resource_matchobj):
|
|
pre = resource_matchobj.group('pre')
|
|
resource = resource_matchobj.group('resource')
|
|
post = resource_matchobj.group('post')
|
|
cacheid = 'cacheid=' + get_resource_revision(resource)
|
|
return pre + cacheid + post
|
|
|
|
def preprocess_text(s):
|
|
if 'KIWIXCACHEID' in s:
|
|
s = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, s)
|
|
assert not 'KIWIXCACHEID' in s
|
|
return s
|
|
|
|
def get_preprocessed_resource(srcpath):
|
|
"""Get the transformed content of a resource
|
|
|
|
If the resource at srcpath is modified by preprocessing then this function
|
|
returns the transformed content of the resource. Otherwise it returns None.
|
|
"""
|
|
try:
|
|
with open(srcpath, 'r') as resource_file:
|
|
content = resource_file.read()
|
|
preprocessed_content = preprocess_text(content)
|
|
return preprocessed_content if preprocessed_content != content else None
|
|
except UnicodeDecodeError:
|
|
# It was a binary resource
|
|
return None
|
|
|
|
|
|
def symlink_resource(src, resource_path):
|
|
if os.path.exists(resource_path):
|
|
if os.path.islink(resource_path) and os.readlink(resource_path) == src:
|
|
return
|
|
os.remove(resource_path)
|
|
os.symlink(src, resource_path)
|
|
|
|
def preprocess_resource(resource_path):
|
|
print('Preprocessing', resource_path, '...')
|
|
resource_dir = os.path.dirname(resource_path)
|
|
if resource_dir != '':
|
|
os.makedirs(os.path.join(OUT_DIR, resource_dir), exist_ok=True)
|
|
srcpath = os.path.join(BASE_DIR, resource_path)
|
|
outpath = os.path.join(OUT_DIR, resource_path)
|
|
if os.path.exists(outpath):
|
|
os.remove(outpath)
|
|
preprocessed_content = get_preprocessed_resource(srcpath)
|
|
if preprocessed_content is None:
|
|
symlink_resource(srcpath, outpath)
|
|
else:
|
|
with open(outpath, 'w') as target:
|
|
print(preprocessed_content, end='', file=target)
|
|
|
|
|
|
def copy_resource_list_file(src_path, dst_path):
|
|
with open(src_path, 'r') as src:
|
|
with open(dst_path, 'w') as dst:
|
|
for line in src:
|
|
res = line.strip()
|
|
if line.startswith("skin/") and res in resource_revisions:
|
|
dst.write(res + " " + resource_revisions[res] + "\n")
|
|
else:
|
|
dst.write(line)
|
|
|
|
def preprocess_resources(resource_file_path):
|
|
resource_filename = os.path.basename(resource_file_path)
|
|
for resource in read_resource_file(resource_file_path):
|
|
preprocess_resource(resource)
|
|
copy_resource_list_file(resource_file_path, os.path.join(OUT_DIR, resource_filename))
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
commands = parser.add_mutually_exclusive_group()
|
|
commands.add_argument('--list-all', action='store_true')
|
|
commands.add_argument('--preprocess', action='store_true')
|
|
parser.add_argument('--outdir')
|
|
parser.add_argument('resource_file')
|
|
args = parser.parse_args()
|
|
BASE_DIR = os.path.dirname(os.path.realpath(args.resource_file))
|
|
OUT_DIR = args.outdir
|
|
|
|
if args.list_all:
|
|
list_resources(args.resource_file)
|
|
elif args.preprocess:
|
|
preprocess_resources(args.resource_file)
|