gpt4 book ai didi

python - 如何以最好的方式 append 从维基百科抓取的 python 代码

转载 作者:行者123 更新时间:2023-12-01 00:34:19 24 4
gpt4 key购买 nike

需要帮助将代码写入 csv 文件。

所以我有我的代码,但我不知道将其制作成 csv 文件的最佳方法。我尝试过做data.append。但由于代码就是这样,它只需要部分代码而不是全部。我怎样才能把它们全部放在一起?我想要这样的:

Title, Year

Bla, 2000

但这就是我尝试时的样子:

bla,

bla,

, 2000

, 2000

这是我的整个代码:

import requests
import csv
from bs4 import BeautifulSoup
import pandas as pd
import re
from datetime import timedelta
import datetime
import time


#the websites
urls = ['https://en.wikipedia.org/wiki/Transistor_count']
data =[]

#getting the websites and the data
for url in urls:
my_url = requests.get(url)
html = my_url.content
soup = BeautifulSoup(html,'html.parser')

ts = time.time()
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')


My_table = soup.find('table',{'class':'wikitable sortable'})
links = My_table.findAll('tr')
number = My_table.findAll('td')
numbers = number[0::1]
numberss = numbers[0::2]
numbersss = numberss[0::1]
numbering = numbersss[1::3]
for num in numbering:
nums = num.text
print(nums)
if nums.startswith("Apple A9X"):
break

My_table2 = soup.find('table',{'class':'wikitable sortable'})
links2 = My_table2.findAll('tr')
number2 = My_table2.findAll('td')
numbers2 = number2[0::1]
numberss2 = numbers2[0::2]
numbering2 = numberss2[2::3]
for num2 in numbering2:
nums2 = num2.text
if nums2.startswith("2015") or nums2.startswith("2016") or nums2.startswith("2017") or nums2.startswith("2018") or nums2.startswith("2019"):
print(nums2)

data.append({'Year':nums2})

for link in links:
My_row = link.find('td')
if My_row == None:
print(None)
else:
My_rows = My_row.text
print(My_rows)

data.append({
'Title':My_rows})



My_second_table = My_table.find_next_sibling('table')
links2 = My_second_table.findAll('tr')
for linka in links2:
My_new_row = linka.find('td')
if My_new_row == None:
print(None)
else:
My_new_rows = My_new_row.text
print(My_new_rows)

data.append({
'Title':My_new_rows
})


with open('data.csv', 'w',encoding='UTF-8', newline='') as f:
fields = ['Title', 'Year']
writer = csv.DictWriter(f, fieldnames=fields)
writer.writeheader()
writer.writerows(data)
testing = pd.read_csv('data.csv')
heading = testing.head()
discription = testing.describe()
#print(heading)

我可能很难做到这一点=P,所以如果您有任何关于如何简化它的解决方案,我将不胜感激,但是,我只是在寻找一种将其转换为 csv 文件的方法。

谢谢!

最佳答案

按照您的方式使用BeautifulSoup,但进行了一些更改

尝试下面的代码,

import urllib.request
from bs4 import BeautifulSoup
import csv


#the websites
urls = ['https://en.wikipedia.org/wiki/Transistor_count']
data =[]

#getting the websites and the data
for url in urls:
## my_url = requests.get(url)
my_url = urllib.request.urlopen(url)

html = my_url.read()
soup = BeautifulSoup(html,'html.parser')

my_table = soup.find('table',{'class':'wikitable sortable'})

with open('data.csv', 'w',encoding='UTF-8', newline='') as f:
fields = ['Title', 'Year']
writer = csv.writer(f, delimiter=',')
writer.writerow(fields)

with open('data.csv', "a", encoding='UTF-8') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
for tr in my_table.find_all('tr')[2:]: # [2:] is to skip empty and header
tds = tr.find_all('td')
try:
title = tds[0].text.replace('\n','')
except:
title = ""
try:
year = tds[2].text.replace('\n','')
except:
year = ""

writer.writerow([title, year])

关于python - 如何以最好的方式 append 从维基百科抓取的 python 代码,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57922689/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com