gpt4 book ai didi

随机森林n_estimators学习曲线

转载 作者:我是一只小鸟 更新时间:2023-04-05 06:31:06 29 4
gpt4 key购买 nike

随机森林

单颗树与随机森林的的分对比

                        
                          # 导入包
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

                        
                      
                        
                          # 实例化红酒数据集
wine = load_wine()

                        
                      
                        
                          # 划分测试集和训练集
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)

                        
                      
                        
                          # 实例化决策树和随机森林,random_state=0
clf = DecisionTreeClassifier(random_state=0)
rfc = RandomForestClassifier(random_state=0)

                        
                      
                        
                          # 训练模型
clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)

                        
                      
                            RandomForestClassifier(random_state=0)
                          
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
                        
                          # 返回测试集的分
clf_score = clf.score(x_test, y_test)
rfc_score = rfc.score(x_test, y_test)
print("sinle tree: {0}\nrandom tree: {1}".format(clf_score, rfc_score))

                        
                      
                        
                          sinle tree: 0.9074074074074074
random tree: 0.9629629629629629

                        
                      

单颗树与随机森林在交叉验证下的对比图

                        
                          # 导入交叉验证和画图工具
%matplotlib inline
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

                        
                      
                        
                          # 实例化决策树和随机森林
clf = DecisionTreeClassifier()
rfc = RandomForestClassifier(n_estimators=25) #创建25棵树组成的随机森林

                        
                      
                        
                          # 实例化交叉验证 10次
clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)

                        
                      
                        
                          # 查看决策树和随机森林的最好结果
print("single tree mean socre: {}\nrandom tree mean socre {}".format(clf_corss.mean(), rfc_corss.mean()))

                        
                      
                        
                          single tree mean socre: 0.8705882352941178
random tree mean socre 0.9722222222222221

                        
                      
                        
                          # 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_corss, label="single tree")
plt.plot(range(1, 11), rfc_corss, label="random tree")
plt.xticks(range(1, 11))
plt.legend()

                        
                      
                        
                          <matplotlib.legend.Legend at 0x7ff6f4815d50>

                        
                      

​ 。

​ 。

                        
                          clf_corss = cross_val_score(clf, wine.data, wine.target, cv=10)
clf_corss

                        
                      
                        
                          array([0.88888889, 0.88888889, 0.72222222, 0.88888889, 0.83333333,
       0.83333333, 1.        , 0.94444444, 0.94117647, 0.76470588])

                        
                      
                        
                          rfc_corss = cross_val_score(rfc, wine.data, wine.target, cv=10)
rfc_corss

                        
                      
                        
                          array([1.        , 1.        , 0.94444444, 0.94444444, 0.88888889,
       1.        , 1.        , 1.        , 1.        , 1.        ])

                        
                      

十次交叉验证下决策树和随机森林的对比

                        
                          # 创建分数列表
clf_list = []
rfc_list = []

                        
                      
                        
                          for i in range(10):
    clf = DecisionTreeClassifier()
    rfc = RandomForestClassifier(n_estimators=25)
    clf_corss_mean = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
    rfc_corss_mean = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
    clf_list.append(clf_corss_mean)
    rfc_list.append(rfc_corss_mean)

                        
                      
                        
                          # 画出决策树和随机森林对比图
plt.plot(range(1, 11), clf_list, label="single tree")
plt.plot(range(1, 11), rfc_list, label="random tree")
plt.xticks(range(1, 11))
plt.legend()

                        
                      
                        
                          <matplotlib.legend.Legend at 0x7ff6f490f670>

                        
                      

n_estimators 学习曲线

                        
                          # 1-200颗树的学习曲线
superpa = []
for i in range(200):
    rfc = RandomForestClassifier(n_estimators=i+1, n_jobs=-1)
    rfc_cross = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()
    superpa.append(rfc_cross)
print(max(superpa), superpa.index(max(superpa)))
plt.figure(figsize=(20,8))
plt.plot(range(1,201), superpa, label="rfc_cross_mean")
plt.legend()

                        
                      
                        
                          0.9888888888888889 20





<matplotlib.legend.Legend at 0x7ff6f540f100>

                        
                      

​ 。

​ 。

最后此篇关于随机森林n_estimators学习曲线的文章就讲到这里了,如果你想了解更多关于随机森林n_estimators学习曲线的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com