From 936666917c2ae4d4e3a9d8a0e63b97f85d1fed38 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Mon, 3 Jun 2024 08:51:48 +0000 Subject: [PATCH] Strip user-agent leading whitespaces and ignore empty user agents --- src/zimit/zimit.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/zimit/zimit.py b/src/zimit/zimit.py index 7c2764a..d48ad7f 100755 --- a/src/zimit/zimit.py +++ b/src/zimit/zimit.py @@ -558,6 +558,16 @@ def get_node_cmd_line(args): "logging", ]: value = getattr(args, arg) + if arg == "userAgent": + # - strip leading whitespace which are not allowed on some websites + # - strip trailing whitespace which are either not allowed if no suffix is + # used, or duplicate with the automatically added one if a suffix is there + # - value is None when userAgent is not passed + if value: + value = value.strip() + if not value: + # ignore empty userAgent arg and keep crawler default value if empty + continue if value is None or (isinstance(value, bool) and value is False): continue node_cmd.append("--" + arg)