我有一个代码示例 - 具有两个组件(PCA 和随机森林)的 sklearn 管道,我想使用管道的中间结果以带来一些可解释性。我知道可以使用 .get_params() 来查看中间步骤,但是是否可以保存或提取中间结果以进行其他操作?我想应用 PCA 的附加功能(代码中的 1.1 和 1.2 部分)
from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd
from sklearn.decomposition import FastICA, PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
#Convert the dataset to data frame
cancer = load_breast_cancer()
data = np.c_[cancer.data, cancer.target]
columns = np.append(cancer.feature_names, ["target"])
df = pd.DataFrame(data, columns=columns)
#Split data into train and test
X = df.iloc[:, 0:30].values
Y = df.iloc[:, 30].values
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)
#Create a pipeline
n_comp = 12
clf = Pipeline([('pca', PCA(n_comp)), ('RandomForest', RandomForestClassifier(n_estimators=100))])
clf.fit(X_train, Y_train)
#Evalute the pipeline
cr = classification_report(Y_test, Y_pred)
print(cr)
#see the intermediate steps of the pipeline
print(clf.get_params()['pca'])
##1.1 if I create PCA outside of the pipeline
pca = PCA(n_components=10)
principalComponents = pca.fit_transform(X)
##1.2 some explainability on pca outside of the pipeline
pca.explained_variance_ratio_
智慧大石
相关分类