download a file, retry if intterupted

master
JoYo 2020-09-02 17:17:06 -04:00
parent 5d096cd67d
commit cae6899008
1 changed files with 59 additions and 0 deletions

59
file_download.py Normal file
View File

@ -0,0 +1,59 @@
from _io import BufferedWriter
from argparse import ArgumentParser
from datetime import datetime, timedelta
from pathlib import Path
from shutil import copyfileobj
from time import sleep
from zipfile import ZipFile
import logging
import requests
def file_download(*, url: str, file_open: BufferedWriter) -> None:
response = requests.head(url)
content_length = response.headers["content-length"]
file_end = file_open.tell()
headers = {"range": f"bytes={file_end}-{content_length}"}
logging.info(headers)
try:
with requests.get(url, headers=headers, stream=True) as response:
copyfileobj(response.raw, file_open)
except Exception as error:
logging.error(error)
sleep(1)
file_download(url=url, file_open=file_open)
def zipfile_download():
description = """
Download large zipfile from URL, retry at offset if interrupted, check zip.
"""
parser = ArgumentParser(description=description)
parser.add_argument("-v", "--verbose", action="count")
parser.add_argument("-url", "--url", required=True, help="URL to zipfile")
parser.add_argument("-o", "--path", required=True, help="path to write zipfile")
args = parser.parse_args()
level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=level)
logging.info(args)
today = datetime.utcnow().strftime("%Y%m%d")
logging.info(today)
path = Path(args.path)
if path.exists():
logging.info(f'output path exists, resuming: "{path}"')
with path.open("wb") as file_open:
file_download(url=args.url, file_open=file_open)
with ZipFile(path) as zip_file:
file_list = zip_file.namelist()
if not file_list:
raise Exception(f'zipfile_empty "{args.url}"')
if __name__ == "__main__":
zipfile_download()