Merge pull request #246 from openzim/fix_zero_arg

Fix zero arg + crawler 0.12.2
This commit is contained in:
rgaudin 2023-11-16 09:15:25 +00:00 committed by GitHub
commit 99ca5ca901
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 12 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ __pycache__
collections/ collections/
node_modules/ node_modules/
output/ output/
venv

View File

@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased ## Unreleased
### Changed
- Using browsertrix-crawler 0.12.2
### Fixed
- Fix logic passing args to crawler to support value '0' (#245)
## [1.6.1] - 2023-11-06 ## [1.6.1] - 2023-11-06
### Changed ### Changed

View File

@ -1,4 +1,4 @@
FROM webrecorder/browsertrix-crawler:0.12.1 FROM webrecorder/browsertrix-crawler:0.12.2
LABEL org.opencontainers.image.source https://github.com/openzim/zimit LABEL org.opencontainers.image.source https://github.com/openzim/zimit
RUN apt-get update \ RUN apt-get update \

View File

@ -151,18 +151,16 @@ def zimit(args=None):
"--extraHops", "--extraHops",
help="Number of extra 'hops' to follow, beyond the current scope", help="Number of extra 'hops' to follow, beyond the current scope",
type=int, type=int,
default=0,
) )
parser.add_argument( parser.add_argument(
"--limit", help="Limit crawl to this number of pages", type=int, default=0 "--limit", help="Limit crawl to this number of pages", type=int
) )
parser.add_argument( parser.add_argument(
"--maxPageLimit", "--maxPageLimit",
help="Maximum pages to crawl, overriding pageLimit if both are set", help="Maximum pages to crawl, overriding pageLimit if both are set",
type=int, type=int,
default=0,
) )
parser.add_argument( parser.add_argument(
@ -263,7 +261,6 @@ def zimit(args=None):
help="If >0, amount of time to sleep (in seconds) after behaviors " help="If >0, amount of time to sleep (in seconds) after behaviors "
"before moving on to next page", "before moving on to next page",
type=int, type=int,
default=0,
) )
parser.add_argument( parser.add_argument(
@ -276,7 +273,6 @@ def zimit(args=None):
"--sizeLimit", "--sizeLimit",
help="If set, save state and exit if size limit exceeds this value", help="If set, save state and exit if size limit exceeds this value",
type=int, type=int,
default=0,
) )
parser.add_argument( parser.add_argument(
@ -291,14 +287,12 @@ def zimit(args=None):
"--timeLimit", "--timeLimit",
help="If set, save state and exit after time limit, in seconds", help="If set, save state and exit after time limit, in seconds",
type=int, type=int,
default=0,
) )
parser.add_argument( parser.add_argument(
"--healthCheckPort", "--healthCheckPort",
help="port to run healthcheck on", help="port to run healthcheck on",
type=int, type=int,
default=0,
) )
parser.add_argument( parser.add_argument(
@ -522,10 +516,11 @@ def get_node_cmd_line(args):
"config", "config",
]: ]:
value = getattr(args, arg) value = getattr(args, arg)
if value: if value == None or (isinstance(value, bool) and value == False):
node_cmd.append("--" + arg) continue
if not isinstance(value, bool): node_cmd.append("--" + arg)
node_cmd.append(str(value)) if not isinstance(value, bool):
node_cmd.append(str(value))
return node_cmd return node_cmd