gpt4 book ai didi

python - 尝试使用 Python 和 BS4 将抓取的值导入 MySQL 表时抛出警告

转载 作者:行者123 更新时间:2023-11-29 21:06:42 24 4
gpt4 key购买 nike

当我尝试将分配的值导入指定的 MySQL 表时,收到截断数据警告。

这些值存储在 SQL 表中,减去“U.S.”以及后面有大量“”的日期之一。

如何转换这些值以使截断的值更令人满意?

 : Warning: Data truncated for column 'last_count' at row 1
cur.execute('INSERT IGNORE INTO RIGCOUNT (area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count) VALUES (\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\")',(area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count))
{'count': u'440', 'area': u'U.S.', 'change_from_prior_count': u'-3', 'last_count': u'15 April\r\n 2016', 'change_from_last_year': u'-514', 'date_of_last_year_count': u'17 April 2015', 'date_of_prior_count': u'8 April 2016'}

import scraperwiki
import requests
from bs4 import BeautifulSoup
import csv
import MySQLdb

#mysql portion
mydb = MySQLdb.connect(host='localhost',
user= '********',
passwd='*******',
db='testdb')
cur = mydb.cursor()

def store (area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count):
cur.execute('INSERT IGNORE INTO RIGCOUNT (area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count) VALUES (\"%s\",$
cur.connection.commit()


base_url = 'http://phx.corporate-ir.net/phoenix.zhtml?c=79687&p=irol-rigcountsoverview'
html = requests.get(base_url)
soup = BeautifulSoup(html.content, "html.parser")

table = soup.findAll('table')
rows = table[1].findAll("tr")
if len(soup.findAll('tr')) > 0:
rows = rows[1:]
for row in rows:
cells = row.findAll('td')
area = cells[0].get_text()
last_count = cells[1].get_text()
count = cells[2].get_text()
change_from_prior_count = cells[3].get_text()
date_of_prior_count = cells[4].get_text()
change_from_last_year = cells[5].get_text()
date_of_last_year_count = cells[6].get_text()
store(area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count)
data = {
'area': cells[0].get_text(),
'last_count': cells[1].get_text(),
'count': cells[2].get_text(),
'change_from_prior_count': cells[3].get_text(),
'date_of_prior_count': cells[4].get_text(),
'change_from_last_year': cells[5].get_text(),
'date_of_last_year_count': cells[6].get_text(),
}

print data
print '\n'
mydb.close()

最佳答案

... one of the dates that has a massive amount of " " after it.

您可以使用正则表达式将多个空格替换为一个空格。

import scraperwiki
import requests
from bs4 import BeautifulSoup
import csv
import MySQLdb
import re

#mysql portion
mydb = MySQLdb.connect(host='localhost',
user= '******',
passwd='******',
db='testdb')
cur = mydb.cursor()


def store (area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count):
cur.execute('INSERT IGNORE INTO RIGCOUNT (area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count) VALUES (\"%s\",$
cur.connection.commit()


base_url = 'http://phx.corporate-ir.net/phoenix.zhtml?c=79687&p=irol-rigcountsoverview'
html = requests.get(base_url)
soup = BeautifulSoup(html.content, "html.parser")

table = soup.findAll('table')
rows = table[1].findAll("tr")
if len(soup.findAll('tr')) > 0:
rows = rows[1:]

white_space_pattern = re.compile(r"\s+")

for row in rows:
cells = row.findAll('td')
cells = [white_space_pattern.sub(' ', cell.get_text()) for cell in cells]

area = cells[0]
last_count = cells[1]
count = cells[2]
change_from_prior_count = cells[3]
date_of_prior_count = cells[4]
change_from_last_year = cells[5]
date_of_last_year_count = cells[6]

store(area, last_count, count, change_from_prior_count, date_of_prior_count, change_from_last_year, date_of_last_year_count)
data = {
'area': area,
'last_count': last_count,
'count': count,
'change_from_prior_count': change_from_prior_count,
'date_of_prior_count': date_of_prior_count,
'change_from_last_year': change_from_last_year,
'date_of_last_year_count': date_of_last_year_count,
}

print data
print '\n'
mydb.close()

关于python - 尝试使用 Python 和 BS4 将抓取的值导入 MySQL 表时抛出警告,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36753497/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com