Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

download large file from URL using python

I have a task to download around 16K+ ( max size is of 1GB) files from given URL to location. Files are of different format like pdf, ppt, doc, docx, zip, jpg, iso etc. So had written below piece of code which

  1. downloads file some times and some times only 26KB file only will be downloaded.
  2. Also sometimes get error message " [Errno 10054] An existing connection was forcibly closed by the remote host"
def download_file(s):
    for row in sheet.iter_rows(min_row=2):
        try:
            url = row[6].value #reading from excel
            # Send GET request to the URL
            response = s.get(url)    
            if response.status_code == 200:    
                with open(save_path, 'wb') as file:
                    file.write(response.content)
        except Exception as e:
            print(f"Error: {e}")


if __name__ == "__main__":
    with requests.session() as s:
        res = s.post(login_url, data=login_data)
        download_file(s)

Tried alternative approach using shutil and downloading in chunks . still the issue is observed. reffered solutions from here and here

import shutil
with requests.get(url, stream=True) as r:
        with open(local_filename, 'wb') as f:
            shutil.copyfileobj(r.raw, f)
response = requests.get(url, stream=True)
with open(book_name, 'wb') as f:
     for chunk in response.iter_content(1024 * 1024 * 2): 
        f.write(chunk)
like image 845
user166013 Avatar asked Nov 17 '25 06:11

user166013


1 Answers

Streaming and Adding Retry Logic should resolve the issue you are facing, refer to the following code sample:

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

def download_file(url, local_filename, session):
    try:
        with session.get(url, stream=True, timeout=10) as response:
            response.raise_for_status()  # Raise an error on bad status codes
            with open(local_filename, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):  # 8KB chunks
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)
        print(f"Downloaded {local_filename} successfully.")
    except Exception as e:
        print(f"Error downloading {local_filename}: {e}")

def create_session_with_retries():
    session = requests.Session()
    # Configure retries: 5 attempts with exponential backoff
    retries = Retry(
        total=5,
        backoff_factor=0.3,
        status_forcelist=[500, 502, 503, 504],
        allowed_methods=["GET", "POST"]
    )
    adapter = HTTPAdapter(max_retries=retries)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

if __name__ == "__main__":
    # Example URL and filename
    url = "https://example.com/largefile.zip"
    local_filename = "largefile.zip"
    
    # Create a session with retries enabled
    session = create_session_with_retries()
    
    # Download the file
    download_file(url, local_filename, session)
like image 191
htrehrthtr Avatar answered Nov 18 '25 19:11

htrehrthtr



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!