Source code for atomistic_cookbook_utils._download
"""HTTP download with retries on transient errors."""
from __future__ import annotations
from pathlib import Path
from typing import Tuple, Union
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
PathLike = Union[str, Path]
[docs]
def download_with_retry(
url: str,
destination: PathLike,
*,
retries: int = 5,
backoff_factor: float = 1.0,
status_forcelist: Tuple[int, ...] = (429, 500, 502, 503, 504),
overwrite: bool = False,
chunk_size: int = 1 << 16,
) -> Path:
"""Download ``url`` to ``destination`` with automatic retries.
The destination's parent directories are created as needed. If the
file already exists and ``overwrite`` is false (the default), the
download is skipped and the existing path is returned.
Retries follow urllib3's :class:`~urllib3.util.retry.Retry` with an
exponential backoff. With the defaults (``retries=5``,
``backoff_factor=1``) the waits between attempts are roughly
1, 2, 4, 8, 16 seconds.
Parameters
----------
url
URL to fetch.
destination
Where to save the file.
retries
Maximum number of retry attempts on the status codes listed in
``status_forcelist`` (and on connect/read errors).
backoff_factor
Exponential backoff factor passed to urllib3 ``Retry``.
status_forcelist
HTTP status codes that trigger a retry.
overwrite
If true, re-download even if the destination already exists.
chunk_size
Streaming chunk size in bytes.
Returns
-------
pathlib.Path
The resolved destination path.
"""
destination = Path(destination)
if destination.exists() and not overwrite:
return destination
destination.parent.mkdir(parents=True, exist_ok=True)
retry = Retry(
total=retries,
backoff_factor=backoff_factor,
status_forcelist=list(status_forcelist),
allowed_methods=frozenset(["GET", "HEAD"]),
)
adapter = HTTPAdapter(max_retries=retry)
with requests.Session() as session:
session.mount("http://", adapter)
session.mount("https://", adapter)
with session.get(url, stream=True) as response:
response.raise_for_status()
tmp_path = destination.with_suffix(destination.suffix + ".part")
try:
with open(tmp_path, "wb") as fh:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
fh.write(chunk)
tmp_path.replace(destination)
except BaseException:
tmp_path.unlink(missing_ok=True)
raise
return destination