Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Python / Selenium downloading a PDF with Firefox

In my code, I am using Firefox as my browser for navigating to a site, conducting a search, and then clicking to view each pdf document. Once in the document (it loads without giving me a prompt to download it), I would like to download the document. I found a Java version of how to do this, but it doesn't work as easily in Python. I am also trying below to pick the element from the Adobe framework (to download it directly, but once again, it cannot find the xpath object).

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests, time


driver = webdriver.Firefox(executable_path="geckodriver")
driver.get("https://www.okcc.online/")
driver.maximize_window()
options = webdriver.FirefoxOptions()
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", "/Users/username/Desktop/oklahoma/oklahoma_county")
options.set_preference("browser.download.useDownloadDir", "true")
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
options.set_preference("pdfjs.disabled", "true")

wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='rod-menu-button']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@id='rodDocTypeTxt']"))).send_keys('MTG')
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[@id='ui-id-1']//li//div"))).click()

driver.find_element_by_xpath('//*[@id="rod-date-toggle"]').click()

driver.find_element_by_xpath('//*[@id="rodFromDateTxt"]').send_keys('4/1/2020')
driver.find_element_by_xpath('//*[@id="rodToDateTxt"]').send_keys('4/20/2020')

search_button = driver.find_element_by_xpath('//*[@id="rod-submit-search"]').click()

time.sleep(2)

pdf = driver.find_elements_by_css_selector(".icon.pdf-icon")

for i in pdf:
    i.click()
    time.sleep(3)
    download_button = driver.find_element_by_xpath('//*[@id="download"]')
    download_button.click()
    close_button = driver.find_element_by_css_selector('.pdf-function-button.pdf-close')
    close_button.click()
like image 747
DesertDeveloper Avatar asked Nov 19 '25 05:11

DesertDeveloper


1 Answers

Probably you have already solved this problem, but your question helped me to solve the same problem that I was facing.

You just need to put the part "driver = webdriver.Firefox" after "options.set_preference", and add options argument in the webdriver.Firefox.

Besides that, you need change the part:

options.set_preference("browser.download.useDownloadDir", "true")
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
options.set_preference("pdfjs.disabled", "true")

for:

options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
options.set_preference("pdfjs.disabled", True)

Below there is my code.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests, time



options = webdriver.FirefoxOptions()
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", "/Users/username/Desktop/oklahoma/oklahoma_county")
options.set_preference("browser.download.useDownloadDir", True)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
options.set_preference("pdfjs.disabled", True)
driver = webdriver.Firefox(options = options, executable_path="geckodriver")
driver.get("https://www.okcc.online/")
driver.maximize_window()

wait = WebDriverWait(driver, 10)
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[@id='rod-menu-button']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@id='rodDocTypeTxt']"))).send_keys('MTG')
wait.until(EC.element_to_be_clickable((By.XPATH, "//ul[@id='ui-id-1']//li//div"))).click()

driver.find_element_by_xpath('//*[@id="rod-date-toggle"]').click()

driver.find_element_by_xpath('//*[@id="rodFromDateTxt"]').send_keys('4/1/2020')
driver.find_element_by_xpath('//*[@id="rodToDateTxt"]').send_keys('4/20/2020')

search_button = driver.find_element_by_xpath('//*[@id="rod-submit-search"]').click()

time.sleep(2)

pdf = driver.find_elements_by_css_selector(".icon.pdf-icon")

for i in pdf:
    i.click()
    time.sleep(3)
    download_button = driver.find_element_by_xpath('//*[@id="download"]')
    download_button.click()
    close_button = driver.find_element_by_css_selector('.pdf-function-button.pdf-close')
    close_button.click()
like image 62
Evandro Lippert Avatar answered Nov 20 '25 18:11

Evandro Lippert



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!