i have tried CSS selector to send value using send_key() function in search box and submit that so that i get the table list of doctors on particular year , but getting below error " ElementNotInteractableException: Message: element not interactable "
below is the code which i have written :
from selenium import webdriver
from bs4 import BeautifulSoup as bs
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
driver.find_element_by_xpath("//a[contains(text(),'Year of Registration')]").click()
driver.find_elements_by_css_selector("input[type='text']")[-1].send_keys("2015")
driver.find_element_by_css_selector("input[value='Submit']").click()
next_page = True
while next_page == True:
soup = bs(driver.page_source, 'html.parser')
table1 = soup.find('table',{'id':'doct_info2'})
try:
rows = table1.find_all('tr')
for row in rows:
if len(row.find_all('td')) == 7:
data = row.find_all('td')
link = data[6].click()
soup2 = bs(link, 'html.parser')
table2 = soup2.find('table',{'id':'doctorBiodata'})
rows = table2.find_all('tr')
print(rows)
except:
pass
time.sleep(5)
try:
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
except:
print ('No more pages')
next_page=False
driver.close()
The below code is for the first 10 result.
You can change length from 10 to 42354 which is the maximum for year 2015 and you will get file of out.csv in same second.
And the second file which is data.csv will include the internal doctor details.
The page is rendered via JavaScript so I've located the XHR request under Network-Table from your Browser Developer-Tools. which is JS .
import pandas as pd
import csv
import re
import requests
def Table():
table = pd.read_json("https://mciindia.org/MCIRest/open/getPaginatedData?service=getPaginatedDoctor&draw=1&columns[0][data]=0&columns[0][name]=&columns[0][searchable]=true&columns[0][orderable]=true&columns[0][search][value]=&columns[0][search][regex]=false&columns[1][data]=1&columns[1][name]=&columns[1][searchable]=true&columns[1][orderable]=true&columns[1][search][value]=&columns[1][search][regex]=false&columns[2][data]=2&columns[2][name]=&columns[2][searchable]=true&columns[2][orderable]=true&columns[2][search][value]=&columns[2][search][regex]=false&columns[3][data]=3&columns[3][name]=&columns[3][searchable]=true&columns[3][orderable]=true&columns[3][search][value]=&columns[3][search][regex]=false&columns[4][data]=4&columns[4][name]=&columns[4][searchable]=true&columns[4][orderable]=true&columns[4][search][value]=&columns[4][search][regex]=false&columns[5][data]=5&columns[5][name]=&columns[5][searchable]=true&columns[5][orderable]=true&columns[5][search][value]=&columns[5][search][regex]=false&columns[6][data]=6&columns[6][name]=&columns[6][searchable]=true&columns[6][orderable]=true&columns[6][search][value]=&columns[6][search][regex]=false&order[0][column]=0&order[0][dir]=asc&start=0&length=10&search[value]=&search[regex]=false&year=2015&_=1577626804003")['data']
with open('out.csv', 'w', newline="") as f:
writer = csv.writer(f)
writer.writerow(
['Year Of The Info', 'Registration#', 'State Medical Councils', 'Name', 'FatherName'])
data = []
for item in table:
writer.writerow(item[1:6])
required = item[6]
match = re.search(
r"openDoctorDetailsnew\('([^']*)', '([^']*)'", required)
data.append(match.group().split("'")[1:4:2])
print("Data Saved Into out.csv")
return data
def Details():
names = []
items = []
for doc, val in Table():
print(f"Extracting DoctorID# {doc}, RegValue# {val}")
json = {'doctorId': doc, 'regdNoValue': val}
r = requests.post(
"https://mciindia.org/MCIRest/open/getDataFromService?service=getDoctorDetailsByIdImr", json=json).json()
if r.keys() not in names:
names.append(r.keys())
items.append(r.values())
print("Done")
return names, items
def Save():
with open('data.csv', 'w', newline="") as d:
writer = csv.writer(d)
n, i = Details()
writer.writerows(n)
writer.writerows(i)
Save()
Check Output Sample out.csv And data.csv
Note: you have to take a look on
concurrent.futuresin case if you will do huge scrape for internal data.
To Enter value on the textbox You need to Induce WebDriverWait() and wait for element_to_be_clickable() and use send_keys()
To get the table you need to induce WebDriverWait() and wait for table to be visible visibility_of_element_located()
Code:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time
url = "https://www.mciindia.org/CMS/information-desk/indian-medical-register"
driver = webdriver.Chrome(r"C:\chromedriver.exe")
driver.get(url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//a[text()='Year of Registration']"))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//input[@id='doctor_year']"))).send_keys("2015")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[@id='doctor_year_details']"))).click()
WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.table.table-bordered.dataTable.no-footer")))
next_page = True
while next_page == True:
soup = bs(driver.page_source, 'html.parser')
table1 = soup.find('table',{'id':'doct_info2'})
try:
rows = table1.find_all('tr')
for row in rows:
if len(row.find_all('td')) == 7:
data = row.find_all('td')
link = data[6].click()
soup2 = bs(link, 'html.parser')
table2 = soup2.find('table',{'id':'doctorBiodata'})
rows = table2.find_all('tr')
print(rows)
except:
pass
time.sleep(5)
try:
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
except:
print ('No more pages')
next_page=False
driver.close()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With