将 FeatureUnion 输出转换为字典，以便进行 DictVectorizer

简短的回答是，您需要从sklearn导入这些：from sklearn.base import BaseEstimator, TransformerMixin我还尝试复制它并遇到了一些其他问题，但我在这个答案中找到了解决方案：Sklearn_pandas在管道中返回TypeError以下是我的完整代码：# Import modulesimport pandas as pdfrom sklearn_pandas import DataFrameMapper, CategoricalImputerfrom sklearn.preprocessing import Imputer, StandardScalerfrom sklearn.pipeline import FeatureUnionfrom sklearn.model_selection import cross_val_score, RandomizedSearchCVfrom sklearn.base import BaseEstimator, TransformerMixinimport xgboost as xgb# Create list of column names for kidney data: kidney_colskidney_cols = ['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr',               'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm',               'cad', 'appet', 'pe', 'ane', 'label']# Load dataset: df_kidneydf_kidney = pd.read_csv('chronic_kidney_disease.csv', names=kidney_cols,                        na_values='?')# Replace label values with 0 (ckd) and 1df_kidney['label'].replace({'ckd':0, 'notckd':1}, inplace=True)# Define X and y: X, yX, y = df_kidney.iloc[:, :-1], df_kidney['label'].values# Define new column order for X: col_ordercol_order = ['age', 'bp', 'sg', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot',             'hemo', 'pcv', 'wc', 'rc', 'rbc', 'pc', 'pcc', 'ba', 'htn', 'dm',             'cad', 'appet', 'pe', 'ane']# Rearrange columns of XX = X[col_order]# Create a boolean mask for categorical columnscategorical_feature_mask = X.dtypes == object# Get a list of categorical column namescategorical_columns = X.columns[categorical_feature_mask].tolist()# Get a list of non-categorical column namesnon_categorical_columns = X.columns[~categorical_feature_mask].tolist()# Create empty list to hold column imputers: transformerstransformers = []# Create numeric imputers and add to list of transformerstransformers.extend([([num_col], [Imputer(strategy='median'),                                                 StandardScaler()]) for num_col                    in non_categorical_columns])# Create categorical imputers and add to list of transformerstransformers.extend([(cat_col, [CategoricalImputer()]) for cat_col in                    categorical_columns])# Use list of transformers to create a DataFrameMapper objectnumeric_categorical_union = DataFrameMapper(transformers, input_df=True,                                            df_out=True)# Define Dictifier class to turn df into dictionary as part of pipelineclass Dictifier(BaseEstimator, TransformerMixin):           def fit(self, X, y=None):        return self    def transform(self, X):        return X.to_dict('records')# Create full pipelinepipeline = Pipeline([('featureunion', numeric_categorical_union),                    ('dictifier', Dictifier()),                    ('vectorizer', DictVectorizer(sort=False)),                    ('clf', xgb.XGBClassifier(max_depth=3))])# Perform cross-validationcross_val_scores = cross_val_score(pipeline, X, y, scoring='roc_auc', cv=3)

将 FeatureUnion 输出转换为字典，以便进行 DictVectorizer

1回答