holdtom
简短的回答是,您需要从sklearn导入这些:from sklearn.base import BaseEstimator, TransformerMixin我还尝试复制它并遇到了一些其他问题,但我在这个答案中找到了解决方案:Sklearn_pandas在管道中返回TypeError以下是我的完整代码:# Import modulesimport pandas as pdfrom sklearn_pandas import DataFrameMapper, CategoricalImputerfrom sklearn.preprocessing import Imputer, StandardScalerfrom sklearn.pipeline import FeatureUnionfrom sklearn.model_selection import cross_val_score, RandomizedSearchCVfrom sklearn.base import BaseEstimator, TransformerMixinimport xgboost as xgb# Create list of column names for kidney data: kidney_colskidney_cols = ['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane', 'label']# Load dataset: df_kidneydf_kidney = pd.read_csv('chronic_kidney_disease.csv', names=kidney_cols, na_values='?')# Replace label values with 0 (ckd) and 1df_kidney['label'].replace({'ckd':0, 'notckd':1}, inplace=True)# Define X and y: X, yX, y = df_kidney.iloc[:, :-1], df_kidney['label'].values# Define new column order for X: col_ordercol_order = ['age', 'bp', 'sg', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane']# Rearrange columns of XX = X[col_order]# Create a boolean mask for categorical columnscategorical_feature_mask = X.dtypes == object# Get a list of categorical column namescategorical_columns = X.columns[categorical_feature_mask].tolist()# Get a list of non-categorical column namesnon_categorical_columns = X.columns[~categorical_feature_mask].tolist()# Create empty list to hold column imputers: transformerstransformers = []# Create numeric imputers and add to list of transformerstransformers.extend([([num_col], [Imputer(strategy='median'), StandardScaler()]) for num_col in non_categorical_columns])# Create categorical imputers and add to list of transformerstransformers.extend([(cat_col, [CategoricalImputer()]) for cat_col in categorical_columns])# Use list of transformers to create a DataFrameMapper objectnumeric_categorical_union = DataFrameMapper(transformers, input_df=True, df_out=True)# Define Dictifier class to turn df into dictionary as part of pipelineclass Dictifier(BaseEstimator, TransformerMixin): def fit(self, X, y=None): return self def transform(self, X): return X.to_dict('records')# Create full pipelinepipeline = Pipeline([('featureunion', numeric_categorical_union), ('dictifier', Dictifier()), ('vectorizer', DictVectorizer(sort=False)), ('clf', xgb.XGBClassifier(max_depth=3))])# Perform cross-validationcross_val_scores = cross_val_score(pipeline, X, y, scoring='roc_auc', cv=3)