increased check_url timeouts

This commit is contained in:
renaud gaudin 2022-07-25 08:41:08 +00:00
parent 857e044c84
commit ce68493087
2 changed files with 3 additions and 2 deletions

View File

@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Using warc2zim version xxx ⚠️ use released warc2zim before releasing
- Using browsertrix-crawler 0.7 ⚠️ use non-beya before releasing
- Fixed `--allowHashUrls` being a boolean param
- Increased `check_url` timeout (12s to connect, 27s to read) instead of 10s
## [1.2.0] - 2022-06-21

View File

@ -396,10 +396,10 @@ def check_url(url, scope=None):
url = urllib.parse.urlparse(url)
try:
resp = requests.head(
url.geturl(), stream=True, allow_redirects=True, timeout=10
url.geturl(), stream=True, allow_redirects=True, timeout=(12.2, 27)
)
except requests.exceptions.RequestException as exc:
print(f"failed to connect to {url}: {exc}", flush=True)
print(f"failed to connect to {url.geturl()}: {exc}", flush=True)
raise SystemExit(1)
actual_url = urllib.parse.urlparse(resp.url)