- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在应用类似的编码路径 tutorial对于我自己的项目,使用 ColumnTransformer 一步传输分类变量和数值变量的值。但我被困在它的 X_test = colT.fit(X_test)
上,我不知道预期的输出应该是什么。
这是我的代码,我在 def standardize_values
函数中遇到错误
import pandas as pd
import numpy as np
import ctypes
import re
import pickle
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer, OneHotEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import helper_functions.helper_functions as hf
import data_preparation as data_prep
# Main class
######################################################################
class Machine_Learning_ProjectX(data_prep.DataPreparation_ProjectX):
def __init__(self):
self.pickle_descriptive_stats_demographic = None
self.pickle_descriptive_stats_clinical = None
self.pickle_descriptive_stats_rx = None
self.pickle_descriptive_stats_csu = None
self.df_demographic = None
self.df_clinical = None
self.df_rx = None
self.df_csu = None
self.df_master = None
self.varname_cat_all = ['INDEX_RURAL_CAT', 'INDEX_SEX', 'AIDS_TAG', 'CHF_TAG', 'CKD_TAG', 'CLD_MILD_TAG', 'CLD_SEVERE_TAG',
'COPD_TAG', 'CTD_TAG', 'CVA_TAG', 'DM_MILD_TAG', 'DM_SEVERE_TAG', 'METS_TAG', 'MI_TAG', 'PUD_TAG',
'PVD_TAG', 'DEMENTIA_TAG', 'HEMIPLEGIA_TAG', 'TUMOR_TAG', 'INDEX_DIN_CAT']
self.varname_num_all = ['INDEX_AGE', 'CCI_SCORE', 'PREINDEX1YR_N_DRUGX_FG_MPR', 'PREINDEX1YR_N_DRUGX_SG_MPR', 'PREINDEX1YR_N_DRUGY_TYPICAL_MPR',
'PREINDEX1YR_N_DRUGY_ATYPICAL_MPR', 'POSTINDEX1YR_N_DRUGX_FG_MPR', 'POSTINDEX1YR_N_DRUGX_SG_MPR',
'POSTINDEX1YR_N_DRUGY_TYPICAL_MPR', 'POSTINDEX1YR_N_DRUGY_ATYPICAL_MPR',
'SUMMED_ALLCAUSE_NUM_PRE2YR', 'SUMMED_ALLCAUSE_NUM_POST2YR', 'SUMMED_ALLCAUSE_COST_PRE2YR',
'SUMMED_ALLCAUSE_COST_POST2YR', 'SUMMED_DXTARGET_NUM_PRE2YR', 'SUMMED_DXTARGET_NUM_POST2YR',
'SUMMED_DXTARGET_COST_PRE2YR', 'SUMMED_DXTARGET_COST_POST2YR', 'DAD_ALLCAUSE_NUM_PRE2YR',
'DAD_ALLCAUSE_NUM_POST2YR', 'DAD_ALLCAUSE_COST_PRE2YR', 'DAD_ALLCAUSE_COST_POST2YR',
'DAD_DXTARGET_NUM_PRE2YR', 'DAD_DXTARGET_NUM_POST2YR', 'DAD_DXTARGET_COST_PRE2YR',
'DAD_DXTARGET_COST_POST2YR', 'PC_ALLCAUSE_NUM_PRE2YR', 'PC_ALLCAUSE_NUM_POST2YR',
'PC_ALLCAUSE_COST_PRE2YR', 'PC_ALLCAUSE_COST_POST2YR', 'PC_DXTARGET_NUM_PRE2YR',
'PC_DXTARGET_NUM_POST2YR', 'PC_DXTARGET_COST_PRE2YR', 'PC_DXTARGET_COST_POST2YR',
'NACRS_ALLCAUSE_NUM_PRE2YR', 'NACRS_ALLCAUSE_NUM_POST2YR', 'NACRS_ALLCAUSE_COST_PRE2YR',
'NACRS_ALLCAUSE_COST_POST2YR', 'NACRS_DXTARGET_NUM_PRE2YR', 'NACRS_DXTARGET_NUM_POST2YR',
'NACRS_DXTARGET_COST_PRE2YR', 'NACRS_DXTARGET_COST_POST2YR']
self.varname_num_unused = ['POSTINDEX1YR_N_DRUGX_FG_MPR', 'POSTINDEX1YR_N_DRUGX_SG_MPR', 'POSTINDEX1YR_N_DRUGY_TYPICAL_MPR',
'POSTINDEX1YR_N_DRUGY_ATYPICAL_MPR', 'SUMMED_ALLCAUSE_NUM_POST2YR', 'SUMMED_ALLCAUSE_COST_POST2YR',
'SUMMED_DXTARGET_NUM_POST2YR', 'SUMMED_DXTARGET_COST_POST2YR', 'DAD_ALLCAUSE_NUM_POST2YR',
'DAD_ALLCAUSE_COST_POST2YR', 'DAD_DXTARGET_NUM_POST2YR', 'DAD_DXTARGET_COST_POST2YR', 'PC_ALLCAUSE_NUM_POST2YR',
'PC_ALLCAUSE_COST_POST2YR', 'PC_DXTARGET_NUM_POST2YR', 'PC_DXTARGET_COST_POST2YR', 'NACRS_ALLCAUSE_NUM_POST2YR',
'NACRS_ALLCAUSE_COST_POST2YR', 'NACRS_DXTARGET_NUM_POST2YR', 'NACRS_DXTARGET_COST_POST2YR']
self.varname_id = ['PHN_ENC', 'INDEX_DATE']
varname_label = ['SUMMED_ALLCAUSE_NUM_POST2YR', 'SUMMED_DXTARGET_NUM_POST2YR', 'SUMMED_ALLCAUSE_COST_POST2YR',
'SUMMED_DXTARGET_COST_POST2YR', ]
self.y_label = varname_label[0]
self.varname_import = list(set(self.varname_id+self.varname_cat_all+self.varname_num_all)-set(self.varname_num_unused))+[self.y_label]
self.result_dict_ml = {}
def ml_steps(self):
self.import_references()
self.import_pickle_descriptive_stats_demographic(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DescriptiveStats_Demographic.pickle')
self.import_pickle_descriptive_stats_clinical(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DescriptiveStats_Clinical.pickle')
self.import_pickle_descriptive_stats_rx(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DescriptiveStats_Rx.pickle')
self.import_pickle_descriptive_stats_csu(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DescriptiveStats_CSU.pickle')
self.import_df_demographic(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DF_Demographic_SubjectLevel.csv')
self.import_df_clinical(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DF_Clinical_SubjectLevel.csv')
self.import_df_rx(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DF_Rx_SubjectLevel.csv')
self.import_df_csu(on_switch=True,
import_dir=self.result_dir,
import_filename='JAHIP_V2_SubjectGroup_DF_CSU_SubjectLevel.csv')
self.merge_dfs(on_switch=True)
self.split_into_training_and_test_sets(on_switch=True)
self.generate_new_features(on_switch=False)
self.handle_missing_values(on_switch=True)
self.standardize_values(on_switch=True)
self.ml_pipeline(on_switch=True)
def import_references(self):
super().__init__()
super()._pandas_output_setting()
super().dir_name()
super().file_name()
super().constant_var()
super().import_ref_data()
# Decorators
def on_or_off(func):
def wrapper(self, *args, on_switch=False, **kwargs):
if on_switch:
func(self, *args, on_switch=on_switch, **kwargs)
return wrapper
# Core class functions
@on_or_off
def import_pickle_descriptive_stats_demographic(self, on_switch, import_dir=None, import_filename=None):
with open(import_dir+import_filename, 'rb') as handle:
self.pickle_descriptive_stats_demographic = pickle.load(handle)
@on_or_off
def import_pickle_descriptive_stats_clinical(self, on_switch, import_dir=None, import_filename=None):
with open(import_dir+import_filename, 'rb') as handle:
self.pickle_descriptive_stats_clinical = pickle.load(handle)
@on_or_off
def import_pickle_descriptive_stats_rx(self, on_switch, import_dir=None, import_filename=None):
with open(import_dir+import_filename, 'rb') as handle:
self.pickle_descriptive_stats_rx = pickle.load(handle)
@on_or_off
def import_pickle_descriptive_stats_csu(self, on_switch, import_dir=None, import_filename=None):
with open(import_dir+import_filename, 'rb') as handle:
self.pickle_descriptive_stats_csu = pickle.load(handle)
@on_or_off
def import_df_demographic(self, on_switch, import_dir=None, import_filename=None):
self.df_demographic = pd.read_csv(import_dir+import_filename, dtype={'PHN_ENC':'str'})
@on_or_off
def import_df_clinical(self, on_switch, import_dir=None, import_filename=None):
self.df_clinical = pd.read_csv(import_dir+import_filename, dtype={'PHN_ENC':'str'})
@on_or_off
def import_df_rx(self, on_switch, import_dir=None, import_filename=None):
self.df_rx = pd.read_csv(import_dir+import_filename, dtype={'PHN_ENC':'str'})
@on_or_off
def import_df_csu(self, on_switch, import_dir=None, import_filename=None):
self.df_csu = pd.read_csv(import_dir+import_filename, dtype={'PHN_ENC':'str'})
@on_or_off
def merge_dfs(self, on_switch):
self.df_master = self.df_demographic.copy()
self.df_master = self.df_master.merge(self.df_clinical, on='PHN_ENC', how='outer')
self.df_master = self.df_master.merge(self.df_rx, on='PHN_ENC', how='outer')
self.df_master = self.df_master.merge(self.df_csu, on='PHN_ENC', how='outer')
assert (len(self.df_master)==self.df_master['PHN_ENC'].nunique()), 'Error: Same subject appears on multiple rows.'
# Remove duplicated columns
self.df_master = self.df_master.loc[:,~self.df_master.columns.str.contains('_y', case=True)]
self.df_master.columns = self.df_master.columns.str.replace('_x', '')
self.df_master = self.df_master.loc[:,~self.df_master.columns.duplicated()]
# Remove unused columns
self.df_master = self.df_master.loc[:, ~self.df_master.columns.str.contains('^Unnamed')]
self.df_master = self.df_master.drop(['temp'], axis=1)
# Retain only needed columns
self.df_master = self.df_master[self.varname_import]
@on_or_off
def split_into_training_and_test_sets(self, on_switch):
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.df_master, self.df_master[self.y_label],
test_size=0.3, random_state=888)
self.X_train = self.X_train.drop(['PHN_ENC', 'INDEX_DATE'], axis=1)
self.X_test = self.X_test.drop(['PHN_ENC', 'INDEX_DATE'], axis=1)
@on_or_off
def generate_new_features(self, on_switch):
pass
@on_or_off
def handle_missing_values(self, on_switch):
self.X_train = self.X_train.apply(lambda x:x.fillna(x.value_counts().index[0]))
self.X_test = self.X_test.apply(lambda x:x.fillna(x.value_counts().index[0]))
self.y_train = self.y_train.fillna(0)
self.y_test = self.y_test.fillna(0)
@on_or_off
def standardize_values(self, on_switch):
colT = ColumnTransformer(
[ ('DUMMY_COL', OneHotEncoder(categories=[['URBAN', 'RURAL'],
['M', 'F'],
['AIDS', 'NON-AIDS'],
['CHF', 'NON-CHF'],
['CKD', 'NON-CKD'],
['CLD_MILD', 'NON-CLD_MILD'],
['CLD_SEVERE', 'NON-CLD_SEVERE'],
['COPD', 'NON-COPD'],
['CTD', 'NON-CTD'],
['CVA', 'NON-CVA'],
['DM_MILD', 'NON-DM_MILD'],
['DM_SEVERE', 'NON-DM_SEVERE'],
['METS', "NON-METS"],
['MI', 'NON-MI'],
['PUD', 'NON-PUD'],
['PVD', 'NON-PVD'],
['DEMENTIA', 'NON-DEMENTIA'],
['HEMIPLEGIA', 'NON-HEMIPLEGIA'],
['TUMOR', 'NON-TUMOR'],
['XX', 'YY', 'ZZ'],
]),
self.varname_cat_all),
('NORM_COL', Normalizer(norm='l1'),
list(set(self.varname_num_all)-set(self.varname_num_unused)))
])
print(self.X_train.shape) # (920, 43)
print(self.X_test.shape) # (395, 43)
self.X_train = colT.fit_transform(self.X_train)
self.X_test = colT.fit(self.X_test)
print(self.X_train.shape) # (920, 63)
print(self.X_test) # Printing some weird output "ColumnTransformer..."
print(self.X_test.shape) # AttributeError: 'ColumnTransformer' object has no attribute 'shape'
@on_or_off
def ml_pipeline(self, on_switch):
regressor = LinearRegression()
regressor.fit(self.X_train, self.y_train) # training the algorithm
#y_pred = regressor.predict(self.X_test) # doesn't work
# Main function
######################################################################
def main():
x = Machine_Learning_ProjectX()
x.ml_steps()
if __name__ == '__main__':
main()
# Output below
(920, 43)
(395, 43)
(920, 63)
ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
transformer_weights=None,
transformers=[('DUMMY_COL', OneHotEncoder(categorical_features=None,
categories=[['URBAN', 'RURAL'], ['M', 'F'], ['AIDS', 'NON-AIDS'], ['CHF', 'NON-CHF'], ['CKD', 'NON-CKD'], ['CLD_MILD', 'NON-CLD_MILD'], ['CLD_SEVERE', 'NON-CLD_SEVERE'], ['COPD', 'NON-COPD'], ['CTD', 'NON-CTD'], ['CVA', 'NON..._DXTARGET_NUM_PRE2YR', 'PREINDEX1YR_N_DRUGY_TYPICAL_MPR', 'INDEX_AGE', 'NACRS_ALLCAUSE_NUM_PRE2YR'])])Traceback (most recent call last):
... line 212, in standardize_values
print(self.X_test.shape)
AttributeError: 'ColumnTransformer' object has no attribute 'shape'
最佳答案
<小时/>
<小时/>The author of the tutorial has made a mistake.
self.X_train = colT.fit_transform(self.X_train)
self.X_test = colT.fit(self.X_test)
这里 self.X_train
是 .fit_transform
的输出方法,所以它是 numpy
目的。另一方面,self.X_test
是 .fit
的输出方法,它是一个模型对象,没有 .shape
属性!
您需要:
self.X_train = colT.fit_transform(self.X_train)
self.X_test = colT.transform(self.X_test)
P.S:查看文章末尾本教程评论中其他人的说法。
关于python-3.x - 属性错误: 'ColumnTransformer' object has no attribute 'shape' in Python Scikit-learn,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56779111/
关于 this页面,我看到以下代码: if ((attributes & FileAttributes.Hidden) == FileAttributes.Hidden) 但我不明白为什么会变成这样。
函数pthread_mutex_init允许您指定指向属性的指针。但是我还没有找到关于pthread属性是什么的很好的解释。我一直只是提供NULL。这个论点有用吗? 该文档,对于那些忘记它的人: PT
我们有一个 xml 节点“item”,其属性为“style”,即“Header1”。但是,这种风格可以改变。我们有一个名为 Header1 的属性集,它定义了它在 PDF 中的外观,通过 xsl:fo
我的任务是在用户点击它时从输入框中删除占位符并使标签可见。如果用户未在其中再次填写任何内容,请放回占位符并使标签不可见。 我可以隐藏它但不能重新分配它。我试过 element.setAttribute
我从文章中编写代码,并且有: public IActionResult Create([Bind(Include="Imie,Nazwisko,Stanowisko,Wiek")] Pracownik
你能给我解释一下以下属性吗? 1) [MonoTouch.Foundation.Register("SomeClass")] 这个属性是否只用于向IB注册类?以编程方式扩展 iOS 类时是否必须使用此
我正在编写一个 C++ 程序,在调试时我在执行以下函数: int CClass::do_something() { ... // I've put a breakpoint here } 我的 C
我已经在 polymer 0.5 中构建了我的应用程序。 现在我已经将它更新到 polymer 1.0。 对于响应式布局,我使用了一个布局属性,它使用 Polymer 0.5 中布局属性的自定义逻辑。
我是使用 Jade 的新手——到目前为止它很棒。 但是我需要发生的一件事是具有“itemscope”属性的元素: 我的 Jade 符是: header(itemscope, itemtype='ht
我正在研究一个厨师实现,有时在过去的地方使用了 attribute.set,attribute.default 会这样做。为了解决这个问题,我对 Chef 属性优先范式非常熟悉。我知道“正常”属性(使
我经常看到html data-attribute (s) 将特定值/参数添加到 html 元素,例如使用它们将按钮“链接”到要打开的模式对话框等的 Bootstrap。 现在,我看到一个几乎著名的
假设如下: def create_new_salt self.salt = self.object_id.to_s + rand.to_s end 为什么使用“ self ”更好。而不是实例变量“
根据我的理解,Backbone.js 模型的属性应该通过以下方式声明为有点私有(private)的成员变量 this.set({ attributeName: attributeValue }) //
我有一个看起来像下面的XML文档: ... ... ... ...
我正在实现一个 JSF 组件,需要有条件地添加一些属性。这个问题类似于之前的 JSF: p:dataTable with f:attribute results in "argument type m
我正在尝试将应用程序发布到 Android 电子市场,但出现以下错误: W/ResourceType(16964): No known package when getting value for r
抱歉这么具体的应用程序,但我注意到另一篇关于 Maya 开发的回答很好的帖子。 我刚刚为 Maya 编写了一个插件节点。它只是根据湍流函数杀死一堆粒子。湍流由许多可在属性编辑器中调整的属性驱动。 在属
我在 html 元素中的数据属性为 Update .它具有数据属性的 bool 值。 跟下面的元素Update有什么区别吗?因为数据属性用双引号引起来。 html是否支持 bool 值? 最佳答案 b
我正在尝试为企业库 5.0 的异常处理 block 创建自定义异常处理程序。据我了解,我需要使用属性开始上课“[ConfigurationElementType(typeof(CustomHandle
我找不到这两个选择器之间的区别。两者似乎都做同样的事情,即根据包含给定字符串的特定属性值选择标签。 对于 [attribute~=value] :http://www.w3schools.com/cs
我是一名优秀的程序员,十分优秀!