From ce68493087a0cb0d89995f21f30ceb3db7d5a54c Mon Sep 17 00:00:00 2001 From: renaud gaudin Date: Mon, 25 Jul 2022 08:41:08 +0000 Subject: [PATCH] increased check_url timeouts --- CHANGELOG.md | 1 + zimit.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 880402c..8bef2cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Using warc2zim version xxx ⚠️ use released warc2zim before releasing - Using browsertrix-crawler 0.7 ⚠️ use non-beya before releasing - Fixed `--allowHashUrls` being a boolean param +- Increased `check_url` timeout (12s to connect, 27s to read) instead of 10s ## [1.2.0] - 2022-06-21 diff --git a/zimit.py b/zimit.py index 6dcc17c..af3ca53 100755 --- a/zimit.py +++ b/zimit.py @@ -396,10 +396,10 @@ def check_url(url, scope=None): url = urllib.parse.urlparse(url) try: resp = requests.head( - url.geturl(), stream=True, allow_redirects=True, timeout=10 + url.geturl(), stream=True, allow_redirects=True, timeout=(12.2, 27) ) except requests.exceptions.RequestException as exc: - print(f"failed to connect to {url}: {exc}", flush=True) + print(f"failed to connect to {url.geturl()}: {exc}", flush=True) raise SystemExit(1) actual_url = urllib.parse.urlparse(resp.url)