From e43cbccfbe03dc6690a6306ef2030c4b64475371 Mon Sep 17 00:00:00 2001 From: Sam <87801374+DeathEaterSam@users.noreply.github.com> Date: Thu, 16 Nov 2023 04:42:25 +0500 Subject: [PATCH] Improved Downloading Functionality Used context manager to open the file and download the file in chunks in a session. Added a progress bar to show the progress of the download. --- pranaam/utils.py | 9 +++++++-- requirements.txt | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pranaam/utils.py b/pranaam/utils.py index 3b224e8..d3ed8dc 100644 --- a/pranaam/utils.py +++ b/pranaam/utils.py @@ -4,6 +4,7 @@ import tarfile import requests from .logging import get_logger +from tqdm.auto import tqdm logger = get_logger() @@ -16,8 +17,12 @@ def download_file(url, target, file_name): try: print("Download models from dataverse...") # download the file - r = requests.get(url, allow_redirects=True) - open(file_path, "wb").write(r.content) + with requests.Session() as s: + r = s.get(REPO_BASE_URL, stream=True, allow_redirects=True) + with tqdm(total=int(r.headers['Content-Length']), unit='iB', unit_scale=True, desc=file_name, initial=0, miniters=1, ascii=True, colour='cyan', leave=True) as pbar: + with open(file_path, 'wb') as fd: + for chunk in r.iter_content(chunk_size=1024**2): + pbar.update(fd.write(chunk)) # untar with tarfile.open(file_path, "r:gz") as tar_ref: def is_within_directory(directory, target): diff --git a/requirements.txt b/requirements.txt index b8ac2dd..2c61bec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ pandas numpy setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability +tqdm