import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
data = []
label = []
for line in open('testSet.txt').readlines():
tmpLine = line.strip().split()
data.append([1.0,float(tmpLine[0]),float(tmpLine[1])]) #前两列是特征
label.append(int(tmpLine[2])) #第三列是标签
#转换之前,data的类型是:List label的类型是: List
#将Data,label转换成矩阵,很关键的一步
data = np.matrix(data)
label = np.matrix(label).transpose()
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# 随机梯度下降算法
# data :特征数据
# label :标签
# alpha :学习率
# epoch :迭代次数
#函数shuffle与permutation都是对原来的数组进行重新洗牌
#区别在于shuffle直接在原来的数组上进行操作,改变原来数组的顺序,无返回值
#而permutation不直接在原来的数组上进行操作,而是返回一个新的打乱顺序的数组,并不改变原来的数组。
def SGD(data,label,alpha,epoch):
m,n = np.shape(data)
# m :行数
# n :列数
weights = np.ones((n,1))
#n行1列
for i in range(epoch):
for j in np.random.permutation(m):
h = sigmoid(np.dot(data[j],weights))
error = h - label[j]
#预测值-真实值
weights = weights - (alpha * error * data[j]).transpose()
return weights
#批量梯度下降
# data :特征数据
# label :标签
# alpha :学习率
# epoch :迭代次数
def BSG(data,label,alpha,epoch):
m, n = np.shape(data)
weights = np.ones((n,1))
for i in range(epoch):
h = sigmoid(np.dot(data,weights))
error = h - label
weights = weights - np.dot(data.T,error) * alpha / m
return weights
#小批量梯度下降
# data :特征数据
# label :标签
# alpha :学习率
# epoch :迭代次数
# miniBatch : 一个小批量的大小
def miniBSG(data,label,alpha,epoch,miniBatch):
m,n = np.shape(data)
weights = np.ones((n,1))
for i in range(epoch):
arr_reflash = np.random.permutation(m)
miniBatch_arr = np.array_split(arr_reflash,miniBatch)
for mini in miniBatch_arr:
h = sigmoid(np.dot(data[mini],weights))
error = h - label[mini]
weights = weights - np.dot(data[mini].T,error) * alpha /miniBatch
return weights
def plotLine(data,label,weights):
m =np.shape(data)[0]
x1 = []
y1 = []
x2 = []
y2 = []
for i in range(m):
if (label[i] == 1):
x1.append(data[i,1])
y1.append(data[i,2])
else:
x2.append(data[i,1])
y2.append(data[i,2])
plt.figure(figsize = (10,5))
plt.scatter(x1,y1,c = 'r',marker = 'o')
plt.scatter(x2,y2,c = 'g',marker = 'x')
x = np.arange(-4,4,0.1)
y = ((-weights[0] - weights[1] * x) / weights[2]).T
plt.plot(x,y)
plt.xlabel('X1')
plt.ylabel('x2')
plt.show()
逻辑回归+梯度下降
数据集来源:机器学习实战
等我研究透了,再更新完整版代码