8.蜜汁代码（1）-原创手记-慕课网

import numpy as np 
import matplotlib.pyplot as plt 

import warnings
warnings.filterwarnings('ignore')

data = []
label = []
for line in open('testSet.txt').readlines():
	tmpLine = line.strip().split()
	data.append([1.0,float(tmpLine[0]),float(tmpLine[1])]) #前两列是特征
	label.append(int(tmpLine[2]))                          #第三列是标签
#转换之前，data的类型是：List  label的类型是： List
#将Data,label转换成矩阵，很关键的一步
data  = np.matrix(data)
label = np.matrix(label).transpose()

def sigmoid(x):
	return 1 / (1 + np.exp(-x))

# 随机梯度下降算法
# data  ：特征数据
# label ：标签
# alpha ：学习率
# epoch ：迭代次数
#函数shuffle与permutation都是对原来的数组进行重新洗牌
#区别在于shuffle直接在原来的数组上进行操作，改变原来数组的顺序，无返回值
#而permutation不直接在原来的数组上进行操作，而是返回一个新的打乱顺序的数组，并不改变原来的数组。

def SGD(data,label,alpha,epoch):
	m,n = np.shape(data)
	# m ：行数
	# n ：列数
	weights = np.ones((n,1))
	#n行1列
	for i in range(epoch):
		for j in np.random.permutation(m):
			h = sigmoid(np.dot(data[j],weights))
			error = h - label[j]   
			#预测值-真实值
			weights = weights - (alpha * error * data[j]).transpose()
	return weights


#批量梯度下降
# data  ：特征数据
# label ：标签
# alpha ：学习率
# epoch ：迭代次数
def BSG(data,label,alpha,epoch):
	m, n = np.shape(data)
	weights = np.ones((n,1))
	for i in range(epoch):
		h = sigmoid(np.dot(data,weights))
		error = h - label
		weights = weights - np.dot(data.T,error) * alpha / m
	return weights

#小批量梯度下降
# data  ：特征数据
# label ：标签
# alpha ：学习率
# epoch ：迭代次数
# miniBatch ： 一个小批量的大小
def miniBSG(data,label,alpha,epoch,miniBatch):
	m,n = np.shape(data)
	weights = np.ones((n,1))
	for i in range(epoch):
		arr_reflash = np.random.permutation(m)
		miniBatch_arr = np.array_split(arr_reflash,miniBatch)
		for mini in miniBatch_arr:
			h = sigmoid(np.dot(data[mini],weights))
			error = h - label[mini]
			weights = weights - np.dot(data[mini].T,error) * alpha /miniBatch
	return weights 

def plotLine(data,label,weights):
	m =np.shape(data)[0]
	x1 = []
	y1 = []
	x2 = []
	y2 = []
	for i in range(m):
		if (label[i] == 1):
			x1.append(data[i,1])
			y1.append(data[i,2])
		else:
			x2.append(data[i,1])
			y2.append(data[i,2])
	plt.figure(figsize = (10,5))
	plt.scatter(x1,y1,c = 'r',marker = 'o')
	plt.scatter(x2,y2,c = 'g',marker = 'x')
	x = np.arange(-4,4,0.1)
	y = ((-weights[0] - weights[1] * x) / weights[2]).T
	plt.plot(x,y)
	plt.xlabel('X1')
	plt.ylabel('x2')
	plt.show()
逻辑回归+梯度下降
数据集来源：机器学习实战
等我研究透了，再更新完整版代码