gpt4 book ai didi

python - GridSearchCV 可以与自定义分类器一起使用吗?

转载 作者:太空狗 更新时间:2023-10-29 20:56:05 26 4
gpt4 key购买 nike

我创建了一个自定义的手工编码分类器,它实现了标准的 sklearn 分类器函数(fit()predict()predict_proba()).这可以直接与 sklearn 实用程序 GridSearchCV() 一起使用,还是应该添加任何内容?

编辑 1:根据 cel 的建议,我尝试直接应用它

第一步是按照说明添加 get_params 和 set_params here .果然,完整的交叉验证程序确实运行了,但最终出现以下错误

return self._fit(X, y, ParameterGrid(self.param_grid))
best_estimator.fit(X, y, **self.fit_params)
AttributeError: 'NoneType' object has no attribute 'fit'

编辑 2:添加分类器代码(它是一个基于 theano 的逻辑回归分类器)

class LogisticRegression:
""" Apply minibatch logistic regression

:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie

:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie

"""

def __init__(self,n_in,n_out,batch_size=600,learning_rate=0.13,iters=500,verbose=0):
self.n_in = n_in
self.n_out = n_out
self.batch_size = batch_size
self.learning_rate = learning_rate
self.iters = iters
self.verbose = verbose
self.single_layer = Layer(self.n_in,self.n_out,T.nnet.softmax)
self.minibatch_count = 0

def get_params(self,deep=True):
return {"n_in" : self.n_in,"n_out" : self.n_out,"batch_size" : self.batch_size,
"learning_rate" : self.learning_rate,"iters" : self.iters,
"verbose" : self.verbose}

def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)

def minibatch_trainer(self,data_x,data_y):
n_batches = data_x.get_value(borrow=True).shape[0]/self.batch_size
tensor_x = T.matrix('x')
tensor_y = T.ivector('y')
index = T.lscalar('index')
cost = self.single_layer.negative_log_likelihood(tensor_x, tensor_y)
g_W = T.grad(cost,self.single_layer.W)
g_b = T.grad(cost,self.single_layer.b)
updates = [(self.single_layer.W,self.single_layer.W - g_W*self.learning_rate),
(self.single_layer.b,self.single_layer.b - g_b*self.learning_rate)]
train_batch = theano.function([index],[cost],
updates=updates,
givens={tensor_x : data_x[index*self.batch_size : (index + 1)*self.batch_size],
tensor_y : data_y[index*self.batch_size : (index + 1)*self.batch_size]})
return np.mean([train_batch(i) for i in xrange(n_batches)])

def fit(self,data_x,data_y):
data_x,data_y = shared_dataset(data_x,data_y)
start = time.clock()
for iter in xrange(self.iters):
train_err = self.minibatch_trainer(data_x,data_y)
if self.verbose==1: print "Iter %d --> %f" % (iter,train_err)
end = time.clock()
print "Finished Training Logistic Regression Model\n" \
"Iterations %d\n" \
"Time Taken : %d secs" % (self.iters,end - start)
return self

def partial_fit(self,data_x,data_y):
data_x,data_y = shared_dataset(data_x,data_y)
self.minibatch_count += 1
err = self.minibatch_trainer(data_x, data_y)
print "MiniBatch %d --> %f" % (self.minibatch_count,err)

def predict(self,data_x):
data_x = shared_dataset(data_x)
n_batches = data_x.get_value(borrow=True).shape[0]/self.batch_size
tensor_x = T.matrix('x')
index = T.lscalar('index')
tensor_ypred = self.prediction_tensor(tensor_x)
predictor = theano.function([index],tensor_ypred,
givens={tensor_x : data_x[index*self.batch_size:(index + 1)*self.batch_size]})
ypred = [predictor(i) for i in xrange(n_batches)]
return np.hstack(ypred)

def predict_proba(self,data_x):
data_x = shared_dataset(data_x)
tensor_x = T.matrix('x')
tensor_ypredproba = self.single_layer.decision_function_tensor(tensor_x)
predproba_func = theano.function([],tensor_ypredproba,
givens={tensor_x : data_x})
return predproba_func()

def prediction_tensor(self,tensor_x):
"""
Returns the predicted y value as a tensor variable
:param tensor_x: TensorType matrix on input data
:return: TensorType tensor_ypred output
"""
return T.argmax(self.single_layer.decision_function_tensor(tensor_x),axis=1)

编辑 3:添加 GridSearchCV 的确切用法

clf_cv = GridSearchCV(LogisticRegression(n_in=200,n_out=2),{"iters" : [3]},cv=4,scoring="roc_auc",n_jobs=-1,verbose=1)

我还尝试添加 BaseEstimator 和 ClassifierMixin; sklearn.base.clone 不输出任何错误

最佳答案

几分钟前遇到了同样的问题。 documentation是不正确的。您必须更改 set_params 才能返回 self:

def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
return self

关于python - GridSearchCV 可以与自定义分类器一起使用吗?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28124366/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com