Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Web Scraping - Python; Writing to a CSV

I am trying to write data from a website. The data is listed as a table in HTML with the tags '' listing when a new block of data is listed in the rankings and '' for each descriptive item about the element in the ranking. The list is a rank of top 500 computers, listed 1-100 with each 1, 2, 3, 4, etc. item listed by '' and the each characteristic of the computer listed as '' (it's storage, max power, etc).

Here is my code:

# read the data from a URL
url = requests.get("https://www.top500.org/list/2018/06/")
url.status_code
url.content


# parse the URL using Beauriful Soup
soup = BeautifulSoup(url.content, 'html.parser')

filename = "computerRank10.csv"
f = open(filename,"w")

headers = "Rank, Site, System, Cores, RMax, RPeak, Power\n"
f.write(headers)

for record in soup.findAll('tr'):
# start building the record with an empty string
  tbltxt = ""
  tbltxt = tbltxt + data.text + ";"
  tbltxt = tbltxt.replace('\n', ' ')
  tbltxt = tbltxt.replace(',', '')
#    f.write(tbltxt[0:-1] + '\n')
f.write(tbltxt + '\n')

f.close()

I'm getting nothing and my CSV file is always blank

like image 520
BeginningFinanceVBAcoder Avatar asked Feb 03 '26 17:02

BeginningFinanceVBAcoder


2 Answers

You should use csv module on the Python standard library.

Here is a simpler solution:

import requests
import csv
from bs4 import BeautifulSoup as bs

url = requests.get("https://www.top500.org/list/2018/06")
soup = bs(url.content, 'html.parser')

filename = "computerRank10.csv"
csv_writer = csv.writer(open(filename, 'w'))


for tr in soup.find_all("tr"):
    data = []
    # for headers ( entered only once - the first time - )
    for th in tr.find_all("th"):
        data.append(th.text)
    if data:
        print("Inserting headers : {}".format(','.join(data)))
        csv_writer.writerow(data)
        continue

    for td in tr.find_all("td"):
        if td.a:
            data.append(td.a.text.strip())
        else:
            data.append(td.text.strip())
    if data:
        print("Inserting data: {}".format(','.join(data)))
        csv_writer.writerow(data)
like image 118
rockikz Avatar answered Feb 06 '26 07:02

rockikz


Try the below script. It should fetch you all the data across and write the same in a csv file:

import csv
import requests
from bs4 import BeautifulSoup

link = "https://www.top500.org/list/2018/06/?page={}"

def get_data(link):
    for url in [link.format(page) for page in range(1,6)]:
        res = requests.get(url)
        soup = BeautifulSoup(res.text,"lxml")

        for items in soup.select("table.table tr"):
            td = [item.get_text(strip=True) for item in items.select("th,td")]
            writer.writerow(td)

if __name__ == '__main__':
    with open("tabularitem.csv","w",newline="") as infile: #if encoding issue comes up then replace with ('tabularitem.csv', 'w', newline="", encoding="utf-8")
        writer = csv.writer(infile)
        get_data(link)
like image 44
SIM Avatar answered Feb 06 '26 06:02

SIM