gpt4 book ai didi

python - sklearn 分类器管道所需的 'valid specification of the columns' 是什么?

转载 作者:行者123 更新时间:2023-12-05 01:12:44 26 4
gpt4 key购买 nike

目标:使用 sklearn 根据 int 和基于对象的特征预测结果。

我正在使用来自 Kaggle 的以下数据集:Soccer Dataset

这是我的笔记本:Kaggle Notebook

图书馆

  • scikit-learn == 0.22.1

我创建了一个几乎可以工作的管道:

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Read the data
df = total_df.copy()

# Remove rows with missing target
df.dropna(axis=0, subset=['result'], inplace=True)

# Separate target from predictors
y = df.result
X = df.drop(['result'], axis=1)

# Break off validation set from training data
X_train_full, X_test_full, y_train, y_test = train_test_split(X, y,
train_size=0.8,
test_size=0.2,
random_state=0)

integer_features = list(X.columns[X.dtypes == 'int64'])
#continuous_features = list(X.columns[X.dtypes == 'float64'])
categorical_features = list(X.columns[X.dtypes == 'object'])

# Keep selected columns only
my_cols = categorical_features + integer_features
X_train = X_train_full[my_cols].copy()
X_test = X_test_full[my_cols].copy()

integer_transformer = Pipeline(steps = [
('imputer', SimpleImputer(strategy = 'most_frequent')),
('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='most_frequent')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
transformers=[
('ints', integer_transformer, integer_features),
('cat', categorical_transformer, categorical_features)])

base = Pipeline(steps=[('preprocessor', preprocessor),
('classifier', RandomForestClassifier())])

# Preprocessing of training data, fit model
base.fit(X_train, y_train)

我收到一个错误:

ValueError:没有有效的列规范。只允许使用所有整数或所有字符串或 bool 掩码的标量、列表或切片

这是完整的回溯:

---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/sklearn/utils/__init__.py in _determine_key_type(key, accept_slice)
255 try:
--> 256 return dtype_to_str[type(key)]
257 except KeyError:

KeyError: <class 'sqlalchemy.sql.elements.quoted_name'>

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
<ipython-input-13-702987dff390> in <module>
47
48 # Preprocessing of training data, fit model
---> 49 base.fit(X_train, y_train)
50
51 base.predict(X_test)

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
348 This estimator
349 """
--> 350 Xt, fit_params = self._fit(X, y, **fit_params)
351 with _print_elapsed_time('Pipeline',
352 self._log_message(len(self.steps) - 1)):

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
313 message_clsname='Pipeline',
314 message=self._log_message(step_idx),
--> 315 **fit_params_steps[name])
316 # Replace the transformer of the step with the fitted
317 # transformer. This is necessary when loading the transformer

/opt/conda/lib/python3.7/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
353
354 def __call__(self, *args, **kwargs):
--> 355 return self.func(*args, **kwargs)
356
357 def call_and_shelve(self, *args, **kwargs):

/opt/conda/lib/python3.7/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, 'fit_transform'):
--> 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)

/opt/conda/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in fit_transform(self, X, y)
514 self._validate_transformers()
515 self._validate_column_callables(X)
--> 516 self._validate_remainder(X)
517
518 result = self._fit_transform(X, y, _fit_transform_one)

/opt/conda/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in _validate_remainder(self, X)
316 if (hasattr(X, 'columns') and
317 any(_determine_key_type(cols) == 'str'
--> 318 for cols in self._columns)):
319 self._df_columns = X.columns
320

/opt/conda/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py in <genexpr>(.0)
316 if (hasattr(X, 'columns') and
317 any(_determine_key_type(cols) == 'str'
--> 318 for cols in self._columns)):
319 self._df_columns = X.columns
320

/opt/conda/lib/python3.7/site-packages/sklearn/utils/__init__.py in _determine_key_type(key, accept_slice)
275 if isinstance(key, (list, tuple)):
276 unique_key = set(key)
--> 277 key_type = {_determine_key_type(elt) for elt in unique_key}
278 if not key_type:
279 return None

/opt/conda/lib/python3.7/site-packages/sklearn/utils/__init__.py in <setcomp>(.0)
275 if isinstance(key, (list, tuple)):
276 unique_key = set(key)
--> 277 key_type = {_determine_key_type(elt) for elt in unique_key}
278 if not key_type:
279 return None

/opt/conda/lib/python3.7/site-packages/sklearn/utils/__init__.py in _determine_key_type(key, accept_slice)
256 return dtype_to_str[type(key)]
257 except KeyError:
--> 258 raise ValueError(err_msg)
259 if isinstance(key, slice):
260 if not accept_slice:

ValueError: No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowed

任何帮助将不胜感激!

编辑:错误状态“只允许使用所有整数或所有字符串的标量、列表或切片,或 bool 掩码”。 integer_featurescategorical_features 是仅包含列名称的列表。

最佳答案

您已将列表用于整数特征和分类特征,而 Transformer 需要索引类型。

categorical_features = X.select_dtypes(include="object").columns
integer_features = X.select_dtypes(exclude="object").columns

改变这个,将解决你的错误。 :)

关于python - sklearn 分类器管道所需的 'valid specification of the columns' 是什么?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61641852/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com