gpt4 book ai didi

python - Unicode解码错误: 'utf-8' codec can't decode byte 0xd5 in position 3362: invalid continuation byte

转载 作者:行者123 更新时间:2023-12-01 09:13:45 24 4
gpt4 key购买 nike

我正在尝试运行这个以前有效的脚本:

import csv
from selenium import webdriver
from time import sleep
from parsel import Selector
from selenium.webdriver.common.keys import Keys
from collections import defaultdict
from selenium.webdriver.support.select import Select

####### reading from the input file ##########

columns = defaultdict(list) # each value in each column is appended to a list

# get the list of keywords from the csv file
with open('query.csv', 'r') as csvfile:
reader = csv.DictReader(csvfile) # read rows into a dictionary format
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k, v) in row.items(): # go over each column name and value
columns[k].append(v) # append the value into the appropriate list

# the list containing all of the keywords
search_query_list = columns['Keyword']

########## start scraping ###############

rb_results = []

# create a driver and let it open google chrome
driver = webdriver.Chrome("chromedriver")

# get linkedin website
driver.get('https://www.redbubble.com/')

sleep(0.5)


for i in range(len(search_query_list)):

next_query = search_query_list[i]

# get RB website
driver.get('https://www.redbubble.com/')

# get the search by its id
search_bar = driver.find_element_by_name("query")

sleep(0.5)

# enter the query to the search bar
search_bar.send_keys(next_query)

# press enter
search_bar.send_keys(Keys.RETURN)
sleep(1)


# from parsel's selector get the page source
sel1 = Selector(text=driver.page_source)
sleep(0.5)

# prima maglietta //
continue_link = driver.find_element_by_class_name('shared-components-ShopSearchSkeleton-ShopSearchSkeleton__composedComponentWrapper--1s_CI').click()
sleep(1)

sel2 = Selector(text=driver.page_source)
sleep(0.5)

################## get TAGS ###############

# Check tags for all products
try:
# get the tags for the search query
tags_rb = driver.find_element_by_class_name("shared-components-Tags-Tags__listContent--oLdDf").text
tags_rb = str(tags_rb)
# if number of products is found print it and search for the prime
# print the number of products found
if tags_rb == None:
rb_results.append("0")

else:

#rb_results = str(tags_rb)
rb_results.append(tags_rb)

except ValueError:
continue

#except:
#rb_results.append("errore")

###### writing part ########
with open ("rb_results.csv","w", newline='') as resultFile:
writer = csv.DictWriter(resultFile, fieldnames=["Rb Results"],delimiter='\t')
writer.writeheader()

writer.writerows({'Rb Results': item} for item in rb_results)

resultFile.close()

当我运行此脚本时,我遇到此错误:

Traceback (most recent call last): File "rb-spider.py", line 18, in for row in reader: # read a row as {column1: value1, column2: value2,...} File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/csv.py", line 111, in next self.fieldnames File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/csv.py", line 98, in fieldnames self._fieldnames = next(self.reader) File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/codecs.py", line 321, in decode (result, consumed) = self._buffer_decode(data, self.errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 3362: invalid continuation byte

我不明白为什么会出现这个错误。有什么想法吗?

最佳答案

尝试使用encoding属性。

# get the list of keywords from the csv file
with open('query.csv', 'r', encoding='utf-8') as csvfile:
...

关于python - Unicode解码错误: 'utf-8' codec can't decode byte 0xd5 in position 3362: invalid continuation byte,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51421807/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com