gpt4 book ai didi

python - 在 Flask 应用程序中从 Jupyter Notebook 中 Unpickle 实例

转载 作者:行者123 更新时间:2023-12-02 18:18:16 28 4
gpt4 key购买 nike

我创建了一个用于 word2vec 矢量化的类,它运行良好。但是,当我创建模型 pickle 文件并在 Flask 应用程序中使用该 pickle 文件时,我收到如下错误:

AttributeError: module '__main__' has no attribute 'GensimWord2VecVectorizer'

我正在 Google Colab 上创建模型。

Jupyter Notebook 中的代码:

# Word2Vec Model
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec

class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):

def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab

def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self

def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings

def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]

return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)

# column transformer
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer([
('step1', GensimWord2VecVectorizer(), 'STATUS')
], remainder='drop')

# Create Model
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import pickle
import numpy as np
import dill
import torch
# ##########
# SVC - support vector classifier
# ##########
# defining parameter range
hyperparameters = {'C': [0.1, 1],
'gamma': [1, 0.1],
'kernel': ['rbf'],
'probability': [True]}
model_sv = Pipeline([
('column_transformers', ct),
('model', GridSearchCV(SVC(), hyperparameters,
refit=True, verbose=3)),
])
model_sv_cEXT = model_sv.fit(X_train, y_train['cEXT'])
# Save the trained cEXT - SVM Model.
import joblib
joblib.dump(model_sv_cEXT, 'model_Word2Vec_sv_cEXT.pkl')

Flask 应用程序中的代码:

# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')

我尝试将同一个类复制到我的 Flask 文件中,但它也不起作用。

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec

class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):

def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab

def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self

def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings

def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]

return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)

# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')

GitHub 代码:https://github.com/Juned-Ansari/test

pickle 文件:https://github.com/Juned-Ansari/test/blob/main/model_Word2Vec_sv_cEXT.pkl

Flask Web 应用程序:https://github.com/Juned-Ansari/test/tree/main/WebApp

最佳答案

来自https://docs.python.org/3/library/pickle.html :

pickle can save and restore class instances transparently, however the class definition must be importable and live in the same module as when the object was stored.

The following types can be pickled:

  • ...
  • classes that are defined at the top level of a module
  • instances of such classes ...

考虑您的目录结构:

├── WebApp/
│ └── app.py
└── Untitled.ipynb

假设您从 WebApp/flask run,因此 app 是一个顶级模块。

首先,将class GensimWord2VecVectorizer移动到WebApp/app.py的顶层。

接下来,在您的 Jupyter Notebook 中,导入 GensimWord2VecVectorizer 并欺骗 pickle 认为它来自顶级 app 模块:

from WebApp.app import GensimWord2VecVectorizer
GensimWord2VecVectorizer.__module__ = 'app'

import sys
sys.modules['app'] = sys.modules['WebApp.app']

然后您应该能够转储加载pickle文件。

Google Colab

如果导入本地模块很麻烦,可以这样做:

GensimWord2VecVectorizer.__module__ = 'app'

import sys
app = sys.modules['app'] = type(sys)('app')
app.GensimWord2VecVectorizer = GensimWord2VecVectorizer

然后您应该能够转储加载pickle文件。

关于python - 在 Flask 应用程序中从 Jupyter Notebook 中 Unpickle 实例,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/71231611/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com