手记

逻辑回归算法的分析与实现

逻辑回归简介

LR–用极大似然估计建立损失函数




用梯度上升法最优化参数求解

1.传统的求解方法

2.梯度上升算法



伪代码实现

逻辑回归的代码实现
数据集


import sys
from numpy import *

# fnfileName
def loadData(fn)
	dataMat = []
	labelMat = []
	fi = open('.' + fn)
	for line in fi
		fd = line.strip().split('t')
		dataMat.append( [1.0,float(fd[0]),float(fd[1])] )

		#要将string型转换成float型
		#[1.0,float(fd[0]),float(fd[1])] 对应于: [1,x1,x2]

		labelMat.append(int (fd[2]))
	return dataMat,labelMat

#更新W
def getW(dataMat,labelMat)
	#要涉及矩阵乘法,需要先将List类型数据转换成Matrix
	dataMatrix = mat(dataMat)                #400,3
	#错误代码: labelMatrix = mat(labelMat)   因为:List --Matrix后:1,400
	labelMatrix = mat(labelMat).transpose()  #transpose后:400,1
	#得到特征(data)的行数和列数——为了接下来可以定义W的维度
	m,n = shape(dataMatrix)      #400,3
	#知道特征的列数后,初始化W,先初始化为1
	weights = ones((n,1))        #3,1
	alpha = 0.01
	max_loop  = 200
	for i in range(max_loop)
		y_pre = sigmoid(dataMatrix  weights )  #400,1
		#error = 真值 - 预测值
		error = labelMatrix - y_pre             #400,1
		#梯度: X  error
	#错误代码:	grad  = dataMatrix  error   (400,3)和(400,1)相乘会报错
	#tranpose  (3,400)  (400,1) = (3,1)
	    grad  = dataMatrix.transpose()  error   #3,1      
		weights = weights + alpha  grad
	return 

def sigmoid(z)
	return 1.0  (1 + exp(-z))
if __name__ == '__main__'
    # 2  x1 - x2 - 4
    #-4  1 + 2  x1 - x2 =  [1,x1,x2]  [-4,2,-1].T
    #x(400,3)  y(400,1) w(3,1)

	x,y = loadData('trainSet.txt')
	tx,ty = loadData('testSet.txt')
	w = getW(x,y)
	y_pre = sigmoid(mat(tx) * W)   #400,1
	y_pre_label = (sign(y_pre - 0.5) + 1)/2
	#阈值 : 0.5
	#sign(): 将取值变成-1和1
	#(sign(y_pre - 0.5) + 1)/2:将结果变成0和1

	# -----指---标---计---算-----
	# -----查准率---召回率---F1_score--
	#precision = TP/(TP + FP)
	#recall = TP/(TP + FN)
	#F1     = 2pr/(p + r)
	#要使用ty
	tp = 0
	tn = 0
	fp = 0 
	fn = 0
	for k in range(len(ty)):
		if y_pre_label[k] == 1 and ty[k] == 1:
			tp += 1
		if y_pre_label[k] == 0 and ty[k] == 0:
			tn += 1
		if y_pre_label[k] == 1 and ty[k] == 0:
			fp += 1
		if y_pre_label[k] == 0 and ty[k] == 1:
			fn += 1
0人推荐
随时随地看视频
慕课网APP