gpt4 book ai didi

python - 在 sklearn 中绘制类权重的验证曲线

转载 作者:行者123 更新时间:2023-11-28 22:18:40 26 4
gpt4 key购买 nike

如果您能告诉我如何绘制类(class)权重的验证曲线,我将不胜感激。事实上,我厌倦了以下代码来完成这项任务:

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, StratifiedKFold, validation_curve, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import numpy as np
import matplotlib.pyplot as plt

def plot_validation_curve(param_range, train_scores, test_scores, title, alpha=0.1):
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)
plt.plot(param_range, train_mean, label='train score', color='blue', marker='o')
plt.fill_between(param_range, train_mean + train_std,
train_mean - train_std, color='blue', alpha=alpha)
plt.plot(param_range, test_mean, label='test score', color='red', marker='o')
plt.fill_between(param_range, test_mean + test_std, test_mean - test_std, color='red', alpha=alpha)
plt.title(title)
plt.grid(ls='--')
plt.xlabel('Parameter value')
plt.ylabel('F-measure')
plt.legend(loc='best')
plt.show()

if __name__ == '__main__':
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.9, 0.1], n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
st = StandardScaler()

rg = LogisticRegression(class_weight={0: 1, 1: 6.5}, random_state=42, solver='saga', max_iter=100, n_jobs=-1)

param_grid = {'clf__C': [0.001, 0.01, 0.1],
'clf__class_weight': [{0: 1, 1: 6}, {0: 1, 1: 5.5}]}

pipeline = Pipeline(steps=[('scaler', st),
('clf', rg)])

cv = StratifiedKFold(n_splits=5, random_state=42)
rg_cv = GridSearchCV(pipeline, param_grid, cv=cv, scoring='f1')
rg_cv.fit(X_train, y_train)

plt.figure(figsize=(9, 6))

param_range2 = [{0: 1, 1: 6}, {0: 1, 1: 4}, {0: 1, 1: 5.5}]

train_scores, test_scores = validation_curve(
estimator=rg_cv.best_estimator_, X=X_train, y=y_train, param_name="clf__class_weight", param_range=param_range2,
cv=cv, scoring="f1", n_jobs=-1)

plot_validation_curve(param_range2, train_scores, test_scores, title="Validation Curve for class_weight", alpha=0.1)

但是报这个错,跟最后一行的param_range2有关:

TypeError: float() argument must be a string or a number, not 'dict'

提前致谢。

最好的问候,

最佳答案

您必须决定要在 x 轴上绘制什么。您对不同的类别权重进行参数扫描。由于您将第一类的权重固定为 1,而只修改了第二类的权重,因此我决定根据第二类的权重绘制分数。

然后我按升序对绘图函数中的权重进行排序,这样您就可以很好地连接线条。

def plot_validation_curve(param_range, train_scores, test_scores, title, alpha=0.1):
param_range = [x[1] for x in param_range]
sort_idx = np.argsort(param_range)
param_range=np.array(param_range)[sort_idx]
train_mean = np.mean(train_scores, axis=1)[sort_idx]
train_std = np.std(train_scores, axis=1)[sort_idx]
test_mean = np.mean(test_scores, axis=1)[sort_idx]
test_std = np.std(test_scores, axis=1)[sort_idx]
plt.plot(param_range, train_mean, label='train score', color='blue', marker='o')
plt.fill_between(param_range, train_mean + train_std,
train_mean - train_std, color='blue', alpha=alpha)
plt.plot(param_range, test_mean, label='test score', color='red', marker='o')
plt.fill_between(param_range, test_mean + test_std, test_mean - test_std, color='red', alpha=alpha)
plt.title(title)
plt.grid(ls='--')
plt.xlabel('Weight of class 2')
plt.ylabel('Average values and standard deviation for F1-Score')
plt.legend(loc='best')
plt.show()

这导致以下情节 enter image description here

关于python - 在 sklearn 中绘制类权重的验证曲线,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50296498/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com