mirror of
https://github.com/openzim/zimit.git
synced 2025-09-28 07:24:13 -04:00
Keep temporary folder when crawler or warc2zim fails, even if not asked for
This commit is contained in:
parent
bc73193ce0
commit
ee82837aaa
@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
### Changed
|
||||
|
||||
- Change solution to report partial ZIM to the Zimfarm and other clients (#304)
|
||||
- Keep temporary folder when crawler or warc2zim fails, even if not asked for (#468)
|
||||
|
||||
### Fixed
|
||||
|
||||
|
@ -33,6 +33,8 @@ from zimit.constants import (
|
||||
)
|
||||
from zimit.utils import download_file
|
||||
|
||||
temp_root_dir: Path | None = None
|
||||
|
||||
|
||||
class ProgressFileWatcher:
|
||||
def __init__(self, output_dir: Path, stats_path: Path):
|
||||
@ -103,6 +105,24 @@ class ProgressFileWatcher:
|
||||
json.dump(out, ofh)
|
||||
|
||||
|
||||
def cleanup():
|
||||
if not temp_root_dir:
|
||||
logger.warning("Temporary root dir not already set, cannot clean this up")
|
||||
return
|
||||
logger.info("")
|
||||
logger.info("----------")
|
||||
logger.info(f"Cleanup, removing temp dir: {temp_root_dir}")
|
||||
shutil.rmtree(temp_root_dir)
|
||||
|
||||
|
||||
def cancel_cleanup():
|
||||
logger.info(
|
||||
f"Temporary files have been kept in {temp_root_dir}, please clean them"
|
||||
" up manually once you don't need them anymore"
|
||||
)
|
||||
atexit.unregister(cleanup)
|
||||
|
||||
|
||||
def run(raw_args):
|
||||
parser = ArgumentParser(
|
||||
description="Run a browser-based crawl on the specified URL and convert to ZIM"
|
||||
@ -427,19 +447,13 @@ def run(raw_args):
|
||||
return EXIT_CODE_WARC2ZIM_CHECK_FAILED
|
||||
|
||||
# make temp dir for this crawl
|
||||
global temp_root_dir # noqa: PLW0603
|
||||
if zimit_args.build:
|
||||
temp_root_dir = Path(tempfile.mkdtemp(dir=zimit_args.build, prefix=".tmp"))
|
||||
else:
|
||||
temp_root_dir = Path(tempfile.mkdtemp(dir=zimit_args.output, prefix=".tmp"))
|
||||
|
||||
if not zimit_args.keep:
|
||||
|
||||
def cleanup():
|
||||
logger.info("")
|
||||
logger.info("----------")
|
||||
logger.info(f"Cleanup, removing temp dir: {temp_root_dir}")
|
||||
shutil.rmtree(temp_root_dir)
|
||||
|
||||
atexit.register(cleanup)
|
||||
|
||||
# copy / download custom behaviors to one single folder and configure crawler
|
||||
@ -599,6 +613,7 @@ def run(raw_args):
|
||||
logger.error(
|
||||
f"Crawl returned an error: {crawl.returncode}, scraper exiting"
|
||||
)
|
||||
cancel_cleanup()
|
||||
return crawl.returncode
|
||||
|
||||
if zimit_args.collection:
|
||||
@ -642,6 +657,11 @@ def run(raw_args):
|
||||
stats_content["partialZim"] = partial_zim
|
||||
stats.write_text(json.dumps(stats_content))
|
||||
|
||||
# also call cancel_cleanup when --keep, even if it is not supposed to be registered,
|
||||
# so that we will display temporary files location just like in other situations
|
||||
if warc2zim_exit_code or zimit_args.keep:
|
||||
cancel_cleanup()
|
||||
|
||||
return warc2zim_exit_code
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user