increased check_url timeouts

2025-09-22 11:22:23 -04:00 · 2022-07-25 08:41:08 +00:00 · 2022-07-25 08:41:08 +00:00 · ce68493087
commit ce68493087
parent 857e044c84
2 changed files with 3 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Using warc2zim version xxx ⚠️ use released warc2zim before releasing
 - Using browsertrix-crawler 0.7 ⚠️ use non-beya before releasing
 - Fixed `--allowHashUrls` being a boolean param
+- Increased `check_url` timeout (12s to connect, 27s to read) instead of 10s

 ## [1.2.0] - 2022-06-21

--- a/zimit.py
+++ b/zimit.py
@ -396,10 +396,10 @@ def check_url(url, scope=None):
    url = urllib.parse.urlparse(url)
    try:
        resp = requests.head(
-            url.geturl(), stream=True, allow_redirects=True, timeout=10
+            url.geturl(), stream=True, allow_redirects=True, timeout=(12.2, 27)
        )
    except requests.exceptions.RequestException as exc:
-        print(f"failed to connect to {url}: {exc}", flush=True)
+        print(f"failed to connect to {url.geturl()}: {exc}", flush=True)
        raise SystemExit(1)
    actual_url = urllib.parse.urlparse(resp.url)