iris花经典数据集
预处理,随机切割分为训练集和测试集
clf fit加载训练集到分类器中
predict来预测测试集,计算准确率,验证分类器的性能
使用准确率metrics accuracy_score
或混淆矩阵metrics confusion_matrix
#encoding-utf-8
import numpy as np
import pandas as pd
# 机器学习分3个步骤:数据预处理、数据建模、结果验证
def main():
#Pre-processing 预处理
from sklearn.datasets import load_iris
iris=load_iris() #直接引入iris数据集 data:150个,分4种属性值;target标度分三类0,1,2
print(iris)
print(len(iris["data"])) #原始数据150个
from sklearn.model_selection import train_test_split
# 数字氛围、 测试数据、 验证数据、 验证数据集占整个的20%,随机选择验证数据
train_data,test_data,train_target,test_target=train_test_split(iris.data,iris.target,test_size=0.2,random_state=1)
# Model 建模
from sklearn import tree #引入决策树
# 决策树分类器 标准=熵
clf=tree.DecisionTreeClassifier(criterion="entropy")
# fit 用训练集进行训练
clf.fit(train_data,train_target)
y_pred=clf.predict(test_data)#通过验证集进行预测
#Verify \验证:1准确率;2混淆矩阵
from sklearn import metrics
print(metrics.accuracy_score(y_true=test_target,y_pred=y_pred))
print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))
# 决策树直接输出文件export_graphviz
with open("./data/tree.dot","w") as fw:
tree.export_graphviz(clf,out_file=fw)
if __name__=="__main__":
main()
决策树示例:

#预处理
from sklearn.datasets import load_iris
iris = load_iris()
from sklearn.cross_validation import train_test_split
train_data, test_data, train_target, test_target = train_test_split(iris.data, iris.target, \
test_size=0.2, random_state=1) #划分训练集与验证集
#建模:分类器
from sklearn import tree
clf = tree.DecisionTreeClassifier(criterion="entropy")
clf.fit(train_data, train_target)
y_pred = clf.predict(test_data)
#验证
from sklearn import metrics
print(metrics.accuracy_score(y_true=test_target,y_pred=y_pred) #准确率
print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred)) #混淆矩阵:横轴实际,纵轴预测
#结果输出到文件
with open("","w") as fw:
tree.export_graphviz(clf, out_file=fw)