I'm trying to download a large folder with 50000 images from my GDrive into a local server using Python. The following code receives a limitation error. Any alternative solutions?
import gdown
url = 'https://drive.google.com/drive/folders/135hTTURfjn43fo4f?usp=sharing' # I'm showing a fake token
gdown.download_folder(url)
Failed to retrieve folder contents:
The gdrive folder with url: https://drive.google.com/drive/folders/135hTTURfjn43fo4f?usp=sharing has at least 50 files, gdrive can't download more than this limit, if you are ok with this, please run again with --remaining-ok flag.
The download limit is set in ../gdown/download_folder.py
If you installed gdown in a virtual environment, simply edit the download_folder.py file located in .venv/lib/python3.*/site-packages/gdown/. Edit the line MAX_NUMBER_FILES = 50 and set the value to your new maximum.
As what kite has mentioned in the comments, use it with the remaining_ok flag.
gdown.download_folder(url, remaining_ok=True)
This wasn't mentioned in https://pypi.org/project/gdown/ so there might be any confusion.
Any references on remaining_ok isn't available aside from the warning and this github code.
Seems like gdown is strictly limited to 50 files and haven't found a way of circumventing it.
If other than gdown is an option, then see code below.
import io
import os
import os.path
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google.oauth2 import service_account
credential_json = {
### Create a service account and use its the json content here ###
### https://cloud.google.com/docs/authentication/getting-started#creating_a_service_account
### credentials.json looks like this:
"type": "service_account",
"project_id": "*********",
"private_key_id": "*********",
"private_key": "-----BEGIN PRIVATE KEY-----\n*********\n-----END PRIVATE KEY-----\n",
"client_email": "service-account@*********.iam.gserviceaccount.com",
"client_id": "*********",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account%40*********.iam.gserviceaccount.com"
}
credentials = service_account.Credentials.from_service_account_info(credential_json)
drive_service = build('drive', 'v3', credentials=credentials)
folderId = '### Google Drive Folder ID ###'
outputFolder = 'output'
# Create folder if not existing
if not os.path.isdir(outputFolder):
os.mkdir(outputFolder)
items = []
pageToken = ""
while pageToken is not None:
response = drive_service.files().list(q="'" + folderId + "' in parents", pageSize=1000, pageToken=pageToken,
fields="nextPageToken, files(id, name)").execute()
items.extend(response.get('files', []))
pageToken = response.get('nextPageToken')
for file in items:
file_id = file['id']
file_name = file['name']
request = drive_service.files().get_media(fileId=file_id)
### Saves all files under outputFolder
fh = io.FileIO(outputFolder + '/' + file_name, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
status, done = downloader.next_chunk()
print(f'{file_name} downloaded completely.')
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With