skf = StratifiedKFold(n_splits=3, random_state=42, shuffle=True)
accuracy = []
for train_index, test_index in skf.split(X,Y):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
gradientBoost.fit(X_train, y_train)
y_pred = gradientBoost.predict(X_test)
accuracy.append(round(accuracy_score(y_test, y_pred), 2))
precision, recall, fscore, support = np.round(score(y_test, y_pred), 2)
print('precision: ' + str(precision))
print('recall: ' + str(recall))
print('fscore: ' + str(fscore))
print('support: ' + str(support))
print(classification_report(y_test, y_pred))
meanAcc= np.mean(np.asarray(accuracy))
print('meanAcc: ', meanAcc)
#Initialize classifier
clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 42,
max_depth=10, min_samples_leaf=8)
scoring = {'Accuracy' : make_scorer(accuracy_score), 'Recall' : 'recall_weighted'}
gs = GridSearchCV(DecisionTreeClassifier(criterion= 'entropy', random_state=42, min_samples_leaf = 10), param_grid={'max_depth' : range(2, 30, 2)},
scoring=scoring, cv=3, refit='Accuracy')
gs.fit(X_Distances, Y)
results = gs.cv_results_
plt.figure(figsize=(13, 13))
plt.title("GridSearchCV evaluating using multiple scorers simultaneously",
ax = plt.axes()
ax.set_xlim(0, 32)
ax.set_ylim(0, 1)
# Get the regular numpy array from the MaskedArray
X_axis = np.array(results['param_max_depth'].data, dtype=float)
for scorer, color in zip(sorted(scoring), ['g', 'k']):
for sample, style in (('train', '--'), ('test', '-')):
sample_score_mean = results['mean_%s_%s' % (sample, scorer)]
sample_score_std = results['std_%s_%s' % (sample, scorer)]
ax.fill_between(X_axis, sample_score_mean - sample_score_std,
sample_score_mean + sample_score_std,
alpha=0.1 if sample == 'test' else 0, color=color)
ax.plot(X_axis, sample_score_mean, style, color=color,
alpha=1 if sample == 'test' else 0.7,
label="%s (%s)" % (scorer, sample))
best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]
best_score = results['mean_test_%s' % scorer][best_index]
# Plot a dotted vertical line at the best score for that scorer marked by x
ax.plot([X_axis[best_index], ] * 2, [0, best_score],
linestyle='-.', color=color, marker='x', markeredgewidth=3, ms=8)
# Annotate the best score for that scorer
ax.annotate("%0.2f" % best_score,
(X_axis[best_index], best_score + 0.005))
我们可以使用GridSearchCV for multi-metric evaluation :
