mirror of
https://github.com/openzim/zimit.git
synced 2025-09-22 11:22:23 -04:00
new crawler folder structure
This commit is contained in:
parent
03abf6050a
commit
2e9c129523
@ -4,6 +4,8 @@
|
||||
- Allows setting combinations of values for waitUntil param
|
||||
- Updated warc2zim to 1.3.5
|
||||
- Updated browsertrix-crawler to 0.3.1
|
||||
- Warc to zim now written to `{temp_root_dir}/collections/capture-*/archive/` where
|
||||
`capture-*` is dynamic and includes the datetime. (from browsertrix-crawler)
|
||||
|
||||
# 1.1.3
|
||||
|
||||
|
2
zimit.py
2
zimit.py
@ -279,7 +279,7 @@ def zimit(args=None):
|
||||
print(f"Running browsertrix-crawler crawl: {cmd_line}", flush=True)
|
||||
subprocess.run(cmd_args, check=True)
|
||||
|
||||
warc_files = temp_root_dir / "collections" / "capture" / "archive"
|
||||
warc_files = list(temp_root_dir.rglob("collections/capture-*/archive/"))[-1]
|
||||
warc2zim_args.append(str(warc_files))
|
||||
|
||||
num_files = sum(1 for e in warc_files.iterdir())
|
||||
|
Loading…
x
Reference in New Issue
Block a user