手记

线性回归基础代码

# Use linear model to model this data.
from sklearn.linear_model import LinearRegression
import numpy as np

lr=LinearRegression()
lr.fit(pga.distance[:,np.newaxis],pga['accuracy'])  # Another way is using pga[['distance']]
theta0=lr.intercept_
theta1=lr.coef_
print(theta0)
print(theta1)


#calculating cost-function for each theta1
#计算平均累积误差
def cost(x,y,theta0,theta1):
    J=0
    for i in range(len(x)):
        mse=(x[i]*theta1+theta0-y[i])**2
        J+=mse
    return J/(2*len(x))

theta0=100
theta1s = np.linspace(-3,2,197)
costs=[]
for theta1 in theta1s:
    costs.append(cost(pga['distance'],pga['accuracy'],theta0,theta1))
plt.plot(theta1s,costs)
plt.show()
print(pga.distance)


#调整theta
def partial_cost_theta0(x,y,theta0,theta1):
    #我们的模型是线性拟合函数时:y=theta1*x + theta0,而不是sigmoid函数,当非线性时我们可以用sigmoid
    #直接多整个x series操作,省的一个一个计算,最终求sum 再平均
    h=theta1*x+theta0  
    diff=(h-y)
    partial=diff.sum()/len(diff)
    return partial
partial0=partial_cost_theta0(pga.distance,pga.accuracy,1,1)

def partial_cost_theta1(x,y,theta0,theta1):
    #我们的模型是线性拟合函数:y=theta1*x + theta0,而不是sigmoid函数,当非线性时我们可以用sigmoid
    h=theta1*x+theta0
    diff=(h-y)*x
    partial=diff.sum()/len(diff)
    return partial
partial1=partial_cost_theta1(pga.distance,pga.accuracy,0,5)
print(partial0)
print(partial1)


def gradient_descent(x,y,alpha=0.1,theta0=0,theta1=0):  #设置默认参数
    #计算成本
    #调整权值
    #计算错误代价,判断是否收敛或者达到最大迭代次数
    most_iterations=1000
    convergence_thres=0.000001 
   
    c=cost(x,y,theta0,theta1)
    costs=[c]
    cost_pre=c+convergence_thres+1.0
    
    counter=0
    while( (np.abs(c-cost_pre)>convergence_thres) & (counter<most_iterations) ):
        update0=alpha*partial_cost_theta0(x,y,theta0,theta1)
        update1=alpha*partial_cost_theta1(x,y,theta0,theta1)
        
        theta0-=update0
        theta1-=update1

        cost_pre=c
        c=cost(x,y,theta0,theta1)
        costs.append(c)
        counter+=1
    return  {'theta0': theta0, 'theta1': theta1, "costs": costs}

print("Theta1 =", gradient_descent(pga.distance, pga.accuracy)['theta1'])
costs=gradient_descent(pga.distance,pga.accuracy,alpha=.01)['cost']
print(gradient_descent(pga.distance, pga.accuracy,alpha=.01)['theta1'])
plt.scatter(range(len(costs)),costs)
plt.show()

数据集 :
复制下面数据,保存为: pga.csv

distance,accuracy
290.3,59.5
302.1,54.7
287.1,62.4
282.7,65.4
299.1,52.8
300.2,51.1
300.9,58.3
279.5,73.9
287.8,67.6
284.7,67.2
296.7,60
283.3,59.4
284,72.2
292,62.1
282.6,66.5
287.9,60.9
279.2,67.3
291.7,64.8
289.9,58.1
289.8,61.7
298.8,56.4
280.8,60.5
294.9,57.5
287.5,61.8
282.7,56
277.7,72.5
270.5,71.7
285.2,66
315.1,55.2
281.9,67.6
293.3,58.2
286,59.9
285.6,58.2
289.9,65.7
277.5,59
293.6,56.8
301.1,65.4
300.8,63.4
287.4,67.3
281.8,72.6
277.4,63.1
279.1,66.5
287.4,66.4
280.9,62.3
287.8,57.2
261.4,69.2
272.6,69.4
291.3,65.3
294.2,52.8
285.5,49
287.9,61.1
282.2,65.6
301.3,58.2
276.2,61.7
281.6,68.1
275.5,61.2
309.7,53.1
287.7,56.4
291.6,56.9
284.1,65
299.6,57.5
282.7,60
271.5,72
292.1,58.2
295,59.4
274.9,69
273.6,68.7
299.9,60.1
279.9,74
289.9,66
283.6,59.8
310.3,52.4
291.7,65.6
284.2,63.2
295,53.5
298.6,55.1
297.4,60.4
299.7,67.7
284.4,69.7
286.4,72.4
285.9,66.9
297.6,54.3
272.5,62
277,66.2
287.6,60.9
280.4,69.4
280,63.7
295.4,52.8
274.4,68.8
286.5,73.1
287.7,65.2
291.5,65.9
279,69.4
299,65.2
290.1,69.1
288.9,67.9
288.8,68.2
283.2,61
293.2,58.4
285.3,67.3
284.1,65.7
281.4,67.7
286.1,61.4
284.9,62.3
284.8,68.1
296,62
282.9,71.8
280.9,67.8
291.2,62
292.8,62.2
291,61.9
285.7,62.4
283.9,62.9
298.4,61.5
285.1,65.3
286.1,60.1
283.1,65.4
289.4,58.3
284.6,70.7
296.6,62.3
295.9,64.9
295.2,62.8
293.9,54.5
275,65.5
286.8,69.5
291.1,64.4
284.8,62.5
283.7,59.5
295.4,66.9
291.8,62.7
274.9,72.3
302.9,61.2
272.1,80.4
274.9,74.9
296.3,59.4
286.2,58.8
294.2,63.3
284.1,66.5
299.2,62.4
275.4,71
273.2,70.9
281.6,65.9
295.7,55.3
287.1,56.8
287.7,66.9
296.7,53.7
282.2,64.2
291.7,65.6
281.6,73.4
311,56.2
278.6,64.7
288,65.7
276.7,72.1
292,62
286.4,69.9
292.7,65.7
294.2,62.9
278.6,59.6
283.1,69.2
284.1,66
278.6,73.6
291.1,60.4
294.6,59.4
274.3,70.5
274,57.1
283.8,62.7
272.7,66.9
303.2,58.3
282,70.4
281.9,61
287,59.9
293.5,63.8
283.6,56.3
296.9,55.3
290.9,58.2
303,58.1
292.8,61.1
281.1,65
293,61.1
284,66.5
279.8,66.7
292.9,65.4
284,66.9
282,64.5
280.6,64
287.7,63.4
287.7,63.4
298.3,59.5
299.6,53.4
291.3,62.5
295.2,61.4
288,62.4
297.8,59.5
286,62.6
285.3,66.2
286.9,63.4
275.1,73.7
0人推荐
随时随地看视频
慕课网APP