gpt4 book ai didi

python - 试图理解一段非常简单的 python 代码 : return [x[0] for x in y] 的行为

转载 作者:太空宇宙 更新时间:2023-11-04 08:17:21 25 4
gpt4 key购买 nike

我认为(而且 Contino 仍然不明白为什么会有所不同)这段代码:

def categories(self):
cur=self.con.execute('select category from cc');
for d in cur:
return d[0]

等同于这个:

def categories(self):
cur=self.con.execute('select category from cc');
return [d[0] for d in cur]

但是当我在我的代码中一个一个地替换另一个时,我在代码的其他地方得到了一个错误:

  File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 226, in post
spam_result = nb.classify(given_sentence)
File "C:\Users\CG\Desktop\Google Drive\Sci&Tech\projects\naivebayes\main.py", line 204, in classify
if cat==best: continue
UnboundLocalError: local variable 'best' referenced before assignment

为什么会这样?为什么这两段代码不等价?

完整代码:

# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-

import sqlite3

import USSSALoader

import random

from pysqlite2 import dbapi2 as sqlite

import re

import math

def getfeatures(doc):
splitter=re.compile('\\W*')
# Split the words by non-alpha characters
words=[s.lower() for s in splitter.split(doc)
if len(s)>2 and len(s)<20]
# Return the unique set of words only
# return dict([(w,1) for w in words]).iteritems()
return dict([(w,1) for w in words])


class classifier:
def __init__(self,getfeatures, filename=None):
# Counts of feature/category combinations
self.fc={}
# Counts of documents in each category
self.cc={}
self.getfeatures=getfeatures

# def setdb(self,dbfile):
self.con=sqlite.connect('db_file')
# self.con=sqlite3.connect(":memory:")
self.con.execute('create table if not exists fc(feature,category,count)')
self.con.execute('create table if not exists cc(category,count)')

def incf(self,f,cat):
count=self.fcount(f,cat)
if count==0:
self.con.execute("insert into fc values ('%s','%s',1)" % (f,cat))
else:
self.con.execute(
"update fc set count=%d where feature='%s' and category='%s'"
% (count+1,f,cat))

def fcount(self,f,cat):
res=self.con.execute(
'select count from fc where feature="%s" and category="%s"'
%(f,cat)).fetchone()
if res==None: return 0
else: return float(res[0])

def incc(self,cat):
count=self.catcount(cat)
if count==0:
self.con.execute("insert into cc values ('%s',1)" % (cat))
else:
self.con.execute("update cc set count=%d where category='%s'"
% (count+1,cat))

def catcount(self,cat):
res=self.con.execute('select count from cc where category="%s"'
%(cat)).fetchone()
if res==None: return 0
else: return float(res[0])

def categories(self):
cur=self.con.execute('select category from cc');
# return [d[0] for d in cur]
for d in cur:
# print "d =", d
# print "d[0] =", d[0]
return d[0]

def totalcount(self):
res=self.con.execute('select sum(count) from cc').fetchone();
if res==None: return 0
print "res=self.con.execute('select * FROM cc').fetchall(); = ", self.con.execute('select * FROM cc').fetchall();
print 'res sum(count) = ', res
print 'res[0] = ', res[0]
return res[0]

def train(self,item,cat):
features=self.getfeatures(item)
# Increment the count for every feature with this category
for f in features:
## for f in features:
self.incf(f,cat)
# Increment the count for this category
self.incc(cat)
self.con.commit()

def fprob(self,f,cat):
if self.catcount(cat)==0: return 0
# The total number of times this feature appeared in this
# category divided by the total number of items in this category
return self.fcount(f,cat)/self.catcount(cat)

def weightedprob(self,f,cat,prf,weight=1.0,ap=0.5):
# Calculate current probability
basicprob=prf(f,cat)
# Count the number of times this feature has appeared in
# all categories
totals=sum([self.fcount(f,c) for c in self.categories()])
# Calculate the weighted average
bp=((weight*ap)+(totals*basicprob))/(weight+totals)
return bp

class naivebayes(classifier):

def __init__(self,getfeatures):
classifier.__init__(self, getfeatures)
self.thresholds={}

def docprob(self,item,cat):
features=self.getfeatures(item)
# Multiply the probabilities of all the features together
p=1
for f in features: p*=self.weightedprob(f,cat,self.fprob)
return p

def prob(self,item,cat):
catprob=self.catcount(cat)/self.totalcount()
docprob=self.docprob(item,cat)
return docprob*catprob

def setthreshold(self,cat,t):
self.thresholds[cat]=t

def getthreshold(self,cat):
if cat not in self.thresholds: return 1.0
return self.thresholds[cat]

def classify(self,item,default=None):
probs={}
# Find the category with the highest probability
max=0.0
for cat in self.categories():
probs[cat]=self.prob(item,cat)
if probs[cat]>max:
max=probs[cat]
best=cat

# Make sure the probability exceeds threshold*next best
for cat in probs:
if cat==best: continue
if probs[cat]*self.getthreshold(best)>probs[best]: return default
return best

def sampletrain(cl):
cl.train('Nobody owns the water.','good')
cl.train('the quick rabbit jumps fences','good')
cl.train('buy pharmaceuticals now','bad')
cl.train('make quick money at the online casino','bad')
cl.train('the quick brown fox jumps','good')


nb = naivebayes(getfeatures)

sampletrain(nb)


doc_test = "buy pharmaceuticals now or earn money at the online casino"

print ('\ndoc_test is classified as %s'%nb.classify(doc_test))

最佳答案

一个函数只返回一次。

当你看到

for d in cur:
return d[0]

循环在第一次迭代期间返回。

但是这个list comprehension

return [d[0] for d in cur]

遍历 cur 中的每个项目以创建一个列表,然后返回结果。

关于python - 试图理解一段非常简单的 python 代码 : return [x[0] for x in y] 的行为,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/11977301/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com