python - 属性错误: 'LdaModel' object has no attribute 'minimum_phi

python - 属性错误: 'LdaModel' object has no attribute 'minimum_phi_value'

转载作者：行者123 更新时间：2023-12-01 09:26:45

由于我刚刚尝试 NLP，然后我正在研究讽刺检测，但与此同时我已经放置了这段代码。

sarcasmextractor.py

# coding: utf-8

# Importing the library

# In[2]:

import io
import sys
import os
import numpy as np
import pandas as pd
import nltk
import gensim
import csv, collections
from textblob import TextBlob
from sklearn.utils import shuffle
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.feature_extraction import DictVectorizer
import pickle
import replace_emoji


# Define a class to load the SentimentWordnet and write methods to calculate the scores

# In[4]:

class load_senti_word_net(object):
    """
    constructor to load the file and read the file as CSV
    6 columns - pos, ID, PosScore, NegScore, synsetTerms, gloss
    synsetTerms can have multiple similar words like abducting#1 abducent#1 and will read each one and calculaye the scores
    """

    def __init__(self):
        sent_scores = collections.defaultdict(list)
        with io.open("SentiWordNet_3.0.0_20130122.txt") as fname:
            file_content = csv.reader(fname, delimiter='\t',quotechar='"')

            for line in file_content:                
                if line[0].startswith('#') :
                    continue                    
                pos, ID, PosScore, NegScore, synsetTerms, gloss = line
                for terms in synsetTerms.split(" "):
                    term = terms.split("#")[0]
                    term = term.replace("-","").replace("_","")
                    key = "%s/%s"%(pos,term.split("#")[0])
                    try:
                        sent_scores[key].append((float(PosScore),float(NegScore)))
                    except:
                        sent_scores[key].append((0,0))

        for key, value in sent_scores.items():
            sent_scores[key] = np.mean(value,axis=0)

        self.sent_scores = sent_scores    

    """
    For a word,
    nltk.pos_tag(["Suraj"])
    [('Suraj', 'NN')]
    """

    def score_word(self, word):
        pos = nltk.pos_tag([word])[0][1]
        return self.score(word, pos)

    def score(self,word, pos):
        """
        Identify the type of POS, get the score from the senti_scores and return the score
        """

        if pos[0:2] == 'NN':
            pos_type = 'n'
        elif pos[0:2] == 'JJ':
            pos_type = 'a'
        elif pos[0:2] =='VB':
            pos_type='v'
        elif pos[0:2] =='RB':
            pos_type = 'r'
        else:
            pos_type =  0

        if pos_type != 0 :    
            loc = pos_type+'/'+word
            score = self.sent_scores[loc]
            if len(score)>1:
                return score
            else:
                return np.array([0.0,0.0])
        else:
            return np.array([0.0,0.0])

    """
    Repeat the same for a sentence
    nltk.pos_tag(word_tokenize("My name is Suraj"))
    [('My', 'PRP$'), ('name', 'NN'), ('is', 'VBZ'), ('Suraj', 'NNP')]    
    """    

    def score_sentencce(self, sentence):
        pos = nltk.pos_tag(sentence)
        print (pos)
        mean_score = np.array([0.0, 0.0])
        for i in range(len(pos)):
            mean_score += self.score(pos[i][0], pos[i][1])

        return mean_score

    def pos_vector(self, sentence):
        pos_tag = nltk.pos_tag(sentence)
        vector = np.zeros(4)

        for i in range(0, len(pos_tag)):
            pos = pos_tag[i][1]
            if pos[0:2]=='NN':
                vector[0] += 1
            elif pos[0:2] =='JJ':
                vector[1] += 1
            elif pos[0:2] =='VB':
                vector[2] += 1
            elif pos[0:2] == 'RB':
                vector[3] += 1

        return vector



# Now let's extract the features
# 
# ###Stemming and Lemmatization

# In[5]:

porter = nltk.PorterStemmer()
sentiments = load_senti_word_net()


# In[7]:

def gram_features(features,sentence):
    sentence_rep = replace_emoji.replace_reg(str(sentence))
    token = nltk.word_tokenize(sentence_rep)
    token = [porter.stem(i.lower()) for i in token]        

    bigrams = nltk.bigrams(token)
    bigrams = [tup[0] + ' ' + tup[1] for tup in bigrams]
    grams = token + bigrams
    #print (grams)
    for t in grams:
        features['contains(%s)'%t]=1.0



# In[8]:

import string
def sentiment_extract(features, sentence):
    sentence_rep = replace_emoji.replace_reg(sentence)
    token = nltk.word_tokenize(sentence_rep)    
    token = [porter.stem(i.lower()) for i in token]   
    mean_sentiment = sentiments.score_sentencce(token)
    features["Positive Sentiment"] = mean_sentiment[0]
    features["Negative Sentiment"] = mean_sentiment[1]
    features["sentiment"] = mean_sentiment[0] - mean_sentiment[1]
    #print(mean_sentiment[0], mean_sentiment[1])

    try:
        text = TextBlob(" ".join([""+i if i not in string.punctuation and not i.startswith("'") else i for i in token]).strip())
        features["Blob Polarity"] = text.sentiment.polarity
        features["Blob Subjectivity"] = text.sentiment.subjectivity
        #print (text.sentiment.polarity,text.sentiment.subjectivity )
    except:
        features["Blob Polarity"] = 0
        features["Blob Subjectivity"] = 0
        print("do nothing")


    first_half = token[0:int(len(token)/2)]    
    mean_sentiment_half = sentiments.score_sentencce(first_half)
    features["positive Sentiment first half"] = mean_sentiment_half[0]
    features["negative Sentiment first half"] = mean_sentiment_half[1]
    features["first half sentiment"] = mean_sentiment_half[0]-mean_sentiment_half[1]
    try:
        text = TextBlob(" ".join([""+i if i not in string.punctuation and not i.startswith("'") else i for i in first_half]).strip())
        features["first half Blob Polarity"] = text.sentiment.polarity
        features["first half Blob Subjectivity"] = text.sentiment.subjectivity
        #print (text.sentiment.polarity,text.sentiment.subjectivity )
    except:
        features["first Blob Polarity"] = 0
        features["first Blob Subjectivity"] = 0
        print("do nothing")

    second_half = token[int(len(token)/2):]
    mean_sentiment_sechalf = sentiments.score_sentencce(second_half)
    features["positive Sentiment second half"] = mean_sentiment_sechalf[0]
    features["negative Sentiment second half"] = mean_sentiment_sechalf[1]
    features["second half sentiment"] = mean_sentiment_sechalf[0]-mean_sentiment_sechalf[1]
    try:
        text = TextBlob(" ".join([""+i if i not in string.punctuation and not i.startswith("'") else i for i in second_half]).strip())
        features["second half Blob Polarity"] = text.sentiment.polarity
        features["second half Blob Subjectivity"] = text.sentiment.subjectivity
        #print (text.sentiment.polarity,text.sentiment.subjectivity )
    except:
        features["second Blob Polarity"] = 0
        features["second Blob Subjectivity"] = 0
        print("do nothing")  





# In[9]:

features = {}
sentiment_extract(features,"a long narrow opening")


# In[11]:

def pos_features(features,sentence):
    sentence_rep = replace_emoji.replace_reg(sentence)
    token = nltk.word_tokenize(sentence_rep)
    token = [ porter.stem(each.lower()) for each in token]
    pos_vector = sentiments.pos_vector(token)
    for j in range(len(pos_vector)):
        features['POS_'+str(j+1)] = pos_vector[j]
    print ("done")



# In[12]:

features = {}
pos_features(features,"a long narrow opening")


# In[13]:

def capitalization(features,sentence):
    count = 0
    for i in range(len(sentence)):
        count += int(sentence[i].isupper())
    features['Capitalization'] = int(count > 3)
    print (count)


# In[14]:

features = {}
capitalization(features,"A LoNg NArrow opening")


# In[15]:

import topic
topic_mod = topic.topic(nbtopic=200,alpha='symmetric')


# In[16]:

topic_mod = topic.topic(model=os.path.join('topics.tp'),dicttp=os.path.join('topics_dict.tp'))


# In[17]:

def topic_feature(features,sentence,topic_modeler):    
    topics = topic_modeler.transform(sentence)    
    for j in range(len(topics)):
        features['Topic :'] = topics[j][1]



# In[18]:

topic_feature(features,"A LoNg NArrow opening",topic_mod)


# In[19]:

def get_features(sentence, topic_modeler):
    features = {}
    gram_features(features,sentence)
    pos_features(features,sentence)
    sentiment_extract(features, sentence)
    capitalization(features,sentence)
    topic_feature(features, sentence,topic_modeler)
    return features


# In[20]:

df = pd.DataFrame()
df = pd.read_csv("dataset_csv.csv", header=0, sep='\t')
df.head()


# In[17]:

import re

for i in range(0,df.size):
    temp = str(df["tweets"][i])
    temp = re.sub(r'[^\x00-\x7F]+','',temp)
    featureset.append((get_features(temp,topic_mod), df["label"][i]))


# In[20]:

c = []
for i in range(0,len(featureset)):
    c.append(pd.DataFrame(featureset[i][0],index=[i]))

result = pd.concat(c)


# In[22]:

result.insert(loc=0,column="label",value='0')


# In[23]:

for i in range(0, len(featureset)):
    result["label"].loc[i] = featureset[i][1]   



# In[25]:

result.to_csv('feature_dataset.csv')


# In[3]:

df = pd.DataFrame()
df = pd.read_csv("feature_dataset.csv", header=0)
df.head()


# In[4]:

get_ipython().magic('matplotlib inline')

import matplotlib as matplot 
import seaborn

result = df


# In[5]:

X = result.drop(['label','Unnamed: 0','Topic :'],axis=1).values


# In[6]:

Y = result['label']


# In[7]:

import pickle
import pefile
import sklearn.ensemble as ek
from sklearn import cross_validation, tree, linear_model
from sklearn.feature_selection import SelectFromModel
from sklearn.externals import joblib
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import make_pipeline
from sklearn import preprocessing
from sklearn import svm
from sklearn.linear_model import LinearRegression
import sklearn.linear_model as lm


# In[29]:

model = { "DecisionTree":tree.DecisionTreeClassifier(max_depth=10),
         "RandomForest":ek.RandomForestClassifier(n_estimators=50),
         "Adaboost":ek.AdaBoostClassifier(n_estimators=50),
         "GradientBoosting":ek.GradientBoostingClassifier(n_estimators=50),
         "GNB":GaussianNB(),
         "Logistic Regression":LinearRegression()   
}


# In[8]:

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y ,test_size=0.2)


# In[9]:

X_train = pd.DataFrame(X_train)
X_train = X_train.fillna(X_train.mean())

X_test = pd.DataFrame(X_test)
X_test = X_test.fillna(X_test.mean())


# In[38]:

results_algo = {}
for algo in model:
    clf = model[algo]
    clf.fit(X_train,y_train.astype(int))
    score = clf.score(X_test,y_test.astype(int))
    print ("%s : %s " %(algo, score))
    results_algo[algo] = score



# In[39]:

winner = max(results_algo, key=results_algo.get)


# In[40]:

clf = model[winner]
res = clf.predict(X_test)
mt = confusion_matrix(y_test, res)
print("False positive rate : %f %%" % ((mt[0][1] / float(sum(mt[0])))*100))
print('False negative rate : %f %%' % ( (mt[1][0] / float(sum(mt[1]))*100)))


# In[41]:

from sklearn import metrics
print (metrics.classification_report(y_test, res))


# In[34]:

test_data = "public meetings are awkard for me as I can insult people but I choose not to and that is something that I find difficult to live with"


# In[101]:

test_data="I purchased this product 4.47 billion years ago and when I opened it today, it was half empty."


# In[82]:

test_data="when people see me eating and ask me are you eating? No no I'm trying to choke myself to death #sarcastic"


# In[102]:

test_feature = []
test_feature.append((get_features(test_data,topic_mod)))


# In[104]:

test_feature


# In[105]:

c = []

c.append(pd.DataFrame(test_feature[0],index=[i]))

test_result = pd.concat(c)
test_result = test_result.drop(['Topic :'],axis=1).values


# In[106]:

res= clf.predict(test_result)

但它给了我以下错误:

C:\ProgramData\Anaconda3\lib\site-packages\gensim\utils.py:1197: UserWarning: detected Windows; aliasing chunkize to chunkize_serial
  warnings.warn("detected Windows; aliasing chunkize to chunkize_serial")
[('a', 'DT'), ('long', 'JJ'), ('narrow', 'JJ'), ('open', 'JJ')]
[('a', 'DT'), ('long', 'JJ')]
[('narrow', 'JJ'), ('open', 'JJ')]
done
5
Traceback (most recent call last):
  File "C:\shubhamprojectwork\sarcasm detection\SarcasmDetection-master\SarcasmDetection-master\Code\sarcasm-extraction.py", line 276, in <module>
    topic_feature(features,"A LoNg NArrow opening",topic_mod)
  File "C:\shubhamprojectwork\sarcasm detection\SarcasmDetection-master\SarcasmDetection-master\Code\sarcasm-extraction.py", line 268, in topic_feature
    topics = topic_modeler.transform(sentence)    
  File "C:\shubhamprojectwork\sarcasm detection\SarcasmDetection-master\SarcasmDetection-master\Code\topic.py", line 42, in transform
    return self.lda[corpus_sentence]     
  File "C:\ProgramData\Anaconda3\lib\site-packages\gensim\models\ldamodel.py", line 1160, in __getitem__
    return self.get_document_topics(bow, eps, self.minimum_phi_value, self.per_word_topics)
AttributeError: 'LdaModel' object has no attribute 'minimum_phi_value'

topic.py的代码:

from gensim import corpora, models, similarities
import nltk
from nltk.corpus import stopwords
import numpy as np
import pandas as pd
import replace_emoji

class topic(object):
    def __init__(self, nbtopic = 100, alpha=1,model=None,dicttp=None):
        self.nbtopic = nbtopic
        self.alpha = alpha
        self.porter = nltk.PorterStemmer()
        self.stop = stopwords.words('english')+['.','!','?','"','...','\\',"''",'[',']','~',"'m","'s",';',':','..','$']
        if model!=None and dicttp!=None:
            self.lda = models.ldamodel.LdaModel.load(model)
            self.dictionary =  corpora.Dictionary.load(dicttp)

    def fit(self,documents):

        documents_mod = documents
        tokens = [nltk.word_tokenize(sentence) for sentence in documents_mod]
        tokens = [[self.porter.stem(t.lower()) for t in sentence if t.lower() not in self.stop] for sentence in tokens]        

        self.dictionary = corpora.Dictionary(tokens)
        corpus = [self.dictionary.doc2bow(text) for text in tokens]
        self.lda = models.ldamodel.LdaModel(corpus,id2word=self.dictionary, num_topics=self.nbtopic,alpha=self.alpha)

        self.lda.save('topics.tp')
        self.dictionary.save('topics_dict.tp')

    def get_topic(self,topic_number):

        return self.lda.print_topic(topic_number)

    def transform(self,sentence):

        sentence_mod = sentence
        tokens = nltk.word_tokenize(sentence_mod)
        tokens = [self.porter.stem(t.lower()) for t in tokens if t.lower() not in self.stop] 
        corpus_sentence = self.dictionary.doc2bow(tokens)

        return self.lda[corpus_sentence]

整体代码可以在这里找到overall code .

最佳答案

minimum_phi_value 是 LdaModel 的一个属性，它是在创建实例时设置的，并且由于某种原因它没有被序列化(这很奇怪，可能是错误)。

要解决此特定问题，您可以添加

self.lda.minimum_phi_value = 0.01

...在 self.lda 加载后或尽可能避免保存/恢复模型(即始终训练它)。

但我鼓励您在序列化之前和之后检查 self.lda 的字段，以检查它们是否相同。

关于python - 属性错误: 'LdaModel' object has no attribute 'minimum_phi_value' ，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/50326147/

文章推荐： java - 将数组中的整个数组内容转入

文章推荐： regex - 允许尾随和前导空格的正则表达式

文章推荐： java - Item 类中的构造函数 Item 不能应用于给定类型

文章推荐： python - pandas read_csv 每隔一列都有索引

javascript - TypeError : Object [object Object], [object Object] 没有找到方法
我遵循了一本名为“Sitepoint Full Stack Javascript with MEAN”的书中的教程，我刚刚完成了第 6 章，应该已经创建了一个带有“数据库”的“服务器”。数据库只不过是
javascript - Ajax返回数组在PHP中显示[object Object],[object Object]
在 Jquery 中，我创建两个数组，一个嵌入另一个数组，就像这样...... arrayOne = [{name:'a',value:1}, {name:'b',value:2}] var arra
javascript - 为什么 ({}+{}) ="[object Object][object Object]"？
这个问题在这里已经有了答案: What is the explanation for these bizarre JavaScript behaviours mentioned in the 'Wa
angular - 无法解析...的所有参数([object Object]，[object Object]，？，？)
我被放在别人的代码上，有一个类用作其他组件的基础。当我尝试 ng serve --aot(或 build --prod)时，我得到以下信息。 @Component({ ...,
javascript - getJSON 只返回 [object Object],[object Object]
我正在测试一些代码，并使用数据创建了一个 json 文件。问题是我在警报中收到“[object Object],[object Object]”。没有数据。我做错了什么？这是代码:
javascript - print[ [object Object],[object Object]] 到json数组
我想打印 [object Object],[object Object] 以明智地 "[[{ 'x': '1', 'y': '0' }, { 'x': '2', 'y': '1' }]]"; 在 ja
javascript - Couchdb 列表仅返回 [object Object][object Object]
我有一个功能 View ，我正在尝试以特殊格式的方式输出。但我无法让列表功能正常工作。我得到的唯一返回是[object Object][object Object] [object Object]
javascript - TypeError Object[object object] 没有方法 SubSelf，TypeError Object[object object] 没有方法 intersectsPlane
在使用优秀的 Sim.js 和 Three.js 库处理 WebGL 项目时，我偶然发现了下一个问题: 一路走来，它使用了 THREE.Ray 的下一个构造函数: var ray = new THRE
javascript - React js 多选 [object Object], [object Object]
我正在使用 Material UI 进行多重选择。这是我的代码。 {listStates.map(col => (
javascript - jquery ajax [object Object] [object Object] 在列表中输出
我的代码使用ajax: $("#keyword").keyup(function() { var keyword = $("#keyword").val(); if (keyword.
angular - 无法解析 AuthenticationService : ([object Object], 的所有参数？，[object Object])
我遇到了下一个错误，无法理解如何解决它。 Can't resolve all parameters for AuthenticationService: ([object Object], ?, [o
Angular 10 FormArray ERROR 错误 : Cannot find control with name: '[object Object], [object Object],[object Object]
我正在尝试创建一个显示动态复选框的表单，至少应选中其中一个才能继续。我还需要获取一组选中的复选框。这是组件的代码: import { Component, OnInit } from '@angul
javascript - 为什么我在 UI 中没有收到验证 Flash 消息，我收到这样的 Flash 错误 [object Object],[object Object],[object Object]
我正在开发 NodeJs 应用程序，它是博客应用程序。我使用了快速验证器，我尝试在 UI 端使用快速闪存消息将帖子保存在数据库中之前使用闪存消息验证数据，我成功地将数据保存在数据库中，但在提交表单后消
jquery - $.getJSON 返回 “undefined” 或 [object Object] [object Object]
我知道有些人问了同样的问题并得到了解答。我已经查看了所有这些，但仍然无法解决我的问题。我有一个 jquery snipet，它将值发送到处理程序，处理程序处理来自 JS 的值并将数据作为 JSON 数
c# - object == object 而不是 object.id == object.id 潜在问题
我继承了一个非常草率的项目，我的任务是解释为什么它不好。我注意到他们在整个代码中都进行了这样的比较 (IQueryable).FirstOrDefault(x => x.Facility == fac
javascript - Object, Object 和 [1 : Object, 2 : Object]? 有什么区别
我只是在删除数组中的对象时偶然发现了这一点。代码如下: friends = []; friends.push( { a: 'Nexus', b: 'Muffi
objective-c - setting object = nil and [object release] VS [object release] and object = nil 有什么区别？
这两个代码片段有什么区别: object = nil; [object release] 对比 [object release]; object = nil; 哪个是最佳实践？最佳答案 object
javascript - Object.create(Object.prototype) , Object.create(Object) 和 Object.create(null) 之间的区别
我应该为其他人将从中继承的第一个父对象传递哪个参数，哪个参数更有效 Object.create(Object.prototype) Object.create(Object) Object.creat
objective-c - 执行cancelPreviousPerformRequestsWithTarget :selector:object: for all objects
我在不同的对象上安排不同的选择器 [self performSelector:@selector(doSmth) withObject:objectA afterDelay:1]; [self per
objective-c - 在 Objective-C 中打印 &object 和 object 的区别
NSLog(@"%p", &object); 和 NSLog(@"%p", object); 有什么区别？两者似乎都打印出一个内存地址，但我不确定哪个是对象的实际内存地址。最佳答案这就是我喜欢的

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

python - 属性错误: 'LdaModel' object has no attribute 'minimum_phi_value'