I have a task to download around 16K+ ( max size is of 1GB) files from given URL to location. Files are of different format like pdf, ppt, doc, docx, zip, jpg, iso etc. So had written below piece of code which
def download_file(s):
for row in sheet.iter_rows(min_row=2):
try:
url = row[6].value #reading from excel
# Send GET request to the URL
response = s.get(url)
if response.status_code == 200:
with open(save_path, 'wb') as file:
file.write(response.content)
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
with requests.session() as s:
res = s.post(login_url, data=login_data)
download_file(s)
Tried alternative approach using shutil and downloading in chunks . still the issue is observed. reffered solutions from here and here
import shutil
with requests.get(url, stream=True) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)
response = requests.get(url, stream=True)
with open(book_name, 'wb') as f:
for chunk in response.iter_content(1024 * 1024 * 2):
f.write(chunk)
Streaming and Adding Retry Logic should resolve the issue you are facing, refer to the following code sample:
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def download_file(url, local_filename, session):
try:
with session.get(url, stream=True, timeout=10) as response:
response.raise_for_status() # Raise an error on bad status codes
with open(local_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192): # 8KB chunks
if chunk: # filter out keep-alive new chunks
f.write(chunk)
print(f"Downloaded {local_filename} successfully.")
except Exception as e:
print(f"Error downloading {local_filename}: {e}")
def create_session_with_retries():
session = requests.Session()
# Configure retries: 5 attempts with exponential backoff
retries = Retry(
total=5,
backoff_factor=0.3,
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET", "POST"]
)
adapter = HTTPAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
if __name__ == "__main__":
# Example URL and filename
url = "https://example.com/largefile.zip"
local_filename = "largefile.zip"
# Create a session with retries enabled
session = create_session_with_retries()
# Download the file
download_file(url, local_filename, session)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With