gpt4 book ai didi

python - UnicodeEncodeError : 'ascii' codec can't encode character u'\xc7' in position 0: when writting to . CSV

转载 作者:太空宇宙 更新时间:2023-11-04 07:10:36 24 4
gpt4 key购买 nike

我有这个代码:

#!/usr/local/bin/python
# -*- coding: utf-8 -*-

import re
import urllib2
import BeautifulSoup
import csv

origin_site = 'http://typo3.nimes.fr/index.php?id=annuaire_assos&theme=0&rech=&num_page='

get_url = re.compile(r"""window.open\('(.*)','','toolbar=0,""", re.DOTALL).findall

pages = range(1,2)

for page_no in pages:
req = ('%s%s' % (origin_site, page_no))
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
try:
urllib2.urlopen(req)
except urllib2.URLError, e:
pass
else:
# do something with the page
doc = urllib2.urlopen(req)
soup = BeautifulSoup.BeautifulSoup(doc)
infoblock = soup.findAll('tr', { "class" : "menu2" })
for item in infoblock:
assoc_data = []
soup = BeautifulSoup.BeautifulSoup(str(item))
for tag in soup.recursiveChildGenerator():
if isinstance(tag,BeautifulSoup.Tag) and tag.name in ('td'):
if tag.string is not None:
assoc_name = (tag.string)
if isinstance(tag,BeautifulSoup.Tag) and tag.name in ('u'):
if tag.string is not None:
assoc_theme = (tag.string)

get_onclick = str(soup('a')[0]['onclick']) # get the 'onclick' attribute
url = get_url(get_onclick)[0]

try:
urllib2.urlopen(url)
except urllib2.URLError, e:
pass
else:
assoc_page = urllib2.urlopen(url)
#print assoc_page, url
soup_page = BeautifulSoup.BeautifulSoup(assoc_page)
assoc_desc = soup_page.find('table', { "bgcolor" : "#FFFFFF" })
#print assoc_desc
get_address = str(soup_page('td', { "class" : "menu2" }))
soup_address = BeautifulSoup.BeautifulSoup(get_address)
for tag in soup_address.recursiveChildGenerator():
if isinstance(tag,BeautifulSoup.Tag) and tag.name in ('a'):
if tag.string is not None:
assoc_email = (tag.string)
assoc_data.append(assoc_theme)
assoc_data.append(assoc_name)
assoc_data.append(assoc_email)
for tag in soup_address.recursiveChildGenerator():
if isinstance(tag,BeautifulSoup.Tag) and tag.name in ('td'):
if tag.string is not None:
if tag.string != ' ':
get_string = BeautifulSoup.BeautifulSoup(tag.string)
assoc_data.append(get_string)
#data.append(get_string)

c = csv.writer(open("MYFILE.csv", "wb"))
for item in assoc_data:
c.writerow(item)

但是得到这个错误:

UnicodeEncodeError: 'ascii' codec can't encode character u'\xc7' in position 0: ordinal not in range(128)

如何将法语字符传递到 MYFILE.csv 文件中?我可以进一步改进代码吗?

最佳答案

滚动到底部:http://docs.python.org/library/csv.html

具体来说,使用这个编写器:

class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""

def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()

def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)

def writerows(self, rows):
for row in rows:
self.writerow(row)

然后,代替

c = csv.writer(open("MYFILE.csv", "wb"))

使用

c = UnicodeWriter(open("MYFILE.csv", "wb"))

关于python - UnicodeEncodeError : 'ascii' codec can't encode character u'\xc7' in position 0: when writting to . CSV,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12886918/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com