From ce68493087a0cb0d89995f21f30ceb3db7d5a54c Mon Sep 17 00:00:00 2001
From: renaud gaudin <reg@rskg.org>
Date: Mon, 25 Jul 2022 08:41:08 +0000
Subject: [PATCH] increased check_url timeouts

---
 CHANGELOG.md | 1 +
 zimit.py     | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 880402c..8bef2cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Using warc2zim version xxx ⚠️ use released warc2zim before releasing
 - Using browsertrix-crawler 0.7 ⚠️ use non-beya before releasing
 - Fixed `--allowHashUrls` being a boolean param
+- Increased `check_url` timeout (12s to connect, 27s to read) instead of 10s
 
 ## [1.2.0] - 2022-06-21
 
diff --git a/zimit.py b/zimit.py
index 6dcc17c..af3ca53 100755
--- a/zimit.py
+++ b/zimit.py
@@ -396,10 +396,10 @@ def check_url(url, scope=None):
     url = urllib.parse.urlparse(url)
     try:
         resp = requests.head(
-            url.geturl(), stream=True, allow_redirects=True, timeout=10
+            url.geturl(), stream=True, allow_redirects=True, timeout=(12.2, 27)
         )
     except requests.exceptions.RequestException as exc:
-        print(f"failed to connect to {url}: {exc}", flush=True)
+        print(f"failed to connect to {url.geturl()}: {exc}", flush=True)
         raise SystemExit(1)
     actual_url = urllib.parse.urlparse(resp.url)