手记

机器学习-多类逻辑回归-手动添加隐藏层

了解神经网络原理的同学们应该都知道,隐藏层越多,最终预测结果的准确度越高,但是计算量也越大,在上一篇的基础上,我们手动添加一个隐藏层,代码如下(主要参考自多层感知机 — 从0开始):

from mxnet import gluon

from mxnet import ndarray as nd

import matplotlib.pyplot as plt

import mxnet as mx

from mxnet import autograd

  

def transform(data, label):

    return data.astype('float32')/255, label.astype('float32')

  

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)

mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

  

def show_images(images):

    n = images.shape[0]

    _, figs = plt.subplots(1, n, figsize=(15, 15))

    for i in range(n):

        figs[i].imshow(images[i].reshape((28, 28)).asnumpy())

        figs[i].axes.get_xaxis().set_visible(False)

        figs[i].axes.get_yaxis().set_visible(False)

    plt.show()

 

def get_text_labels(label):

    text_labels = [

        'T 恤', '长 裤', '套头衫', '裙 子', '外 套',

        '凉 鞋', '衬 衣', '运动鞋', '包 包', '短 靴'

    ]

    return [text_labels[int(i)] for i in label]

  

data, label = mnist_train[0:10]

  

print('example shape: ', data.shape, 'label:', label)

show_images(data)

print(get_text_labels(label))

  

batch_size = 256

train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)

test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

  

num_inputs = 784

num_outputs = 10

  

#增加一层包含256个节点的隐藏层

num_hidden = 256

weight_scale = .01

  

#输入层的参数

W1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale)

b1 = nd.zeros(num_hidden)

  

#隐藏层的参数

W2 = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale)

b2 = nd.zeros(num_outputs)

  

#参数变多了

params = [W1, b1, W2, b2]

  

for param in params:

    param.attach_grad()

     

#激活函数

def relu(X):

    return nd.maximum(X, 0)

  

#计算模型

def net(X):

    X = X.reshape((-1, num_inputs))

    #先计算到隐藏层的输出

    h1 = relu(nd.dot(X, W1) + b1)

    #再利用隐藏层计算最终的输出

    output = nd.dot(h1, W2) + b2

    return output

  

#Softmax和交叉熵损失函数

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

 

#梯度下降法

def SGD(params, lr):

    for param in params:

        param[:] = param - lr * param.grad

     

def accuracy(output, label):

    return nd.mean(output.argmax(axis=1) == label).asscalar()

  

def _get_batch(batch):

    if isinstance(batch, mx.io.DataBatch):

        data = batch.data[0]

        label = batch.label[0]

    else:

        data, label = batch

    return data, label

  

def evaluate_accuracy(data_iterator, net):

    acc = 0.

    if isinstance(data_iterator, mx.io.MXDataIter):

        data_iterator.reset()

    for i, batch in enumerate(data_iterator):

        data, label = _get_batch(batch)

        output = net(data)

        acc += accuracy(output, label)

    return acc / (i+1)

  

learning_rate = .5

  

for epoch in range(5):

    train_loss = 0.

    train_acc = 0.

    for data, label in train_data:

        with autograd.record():

            output = net(data)

            #使用Softmax和交叉熵损失函数

            loss = softmax_cross_entropy(output, label)

        loss.backward()

        SGD(params, learning_rate / batch_size)

        train_loss += nd.mean(loss).asscalar()

        train_acc += accuracy(output, label)

  

    test_acc = evaluate_accuracy(test_data, net)

    print("Epoch %d. Loss: %f, Train acc %f, Test acc %f" % (

        epoch, train_loss / len(train_data), train_acc / len(train_data), test_acc))

 

data, label = mnist_test[0:10]

show_images(data)

print('true labels')

print(get_text_labels(label))

  

predicted_labels = net(data).argmax(axis=1)

print('predicted labels')

print(get_text_labels(predicted_labels.asnumpy()))

 有变的地方,都加了注释,主要改动点有5个:

1. 手动添加了1个隐藏层,该层有256个节点

2. 多了一层,所以参数也变多了

3. 计算y=wx+b模型时,就要一层层来算了

4. 将softmax与交叉熵CrossEntropy合并了(这样避免了单独对softmax求导,理论上讲更稳定些)

5. 另外激活函数换成了收敛速度更快的relu(参考:Deep learning系列(七)激活函数 )

运行效果:

相对原始纯手动版本,准确率提升了不少!

 

tips:类似的思路,我们可以再手动添加第2层隐藏层,关键代码参考下面

...

 

#增加一层包含256个节点的隐藏层

num_hidden1 = 256

weight_scale1 = .01

 

#再增加一层包含512个节点的隐藏层

num_hidden2 = 512

weight_scale2 = .01

  

#输入层的参数

W1 = nd.random_normal(shape=(num_inputs, num_hidden1), scale=weight_scale1)

b1 = nd.zeros(num_hidden1)

  

#隐藏层的参数

W2 = nd.random_normal(shape=(num_hidden1, num_hidden2), scale=weight_scale1)

b2 = nd.zeros(num_hidden2)

 

W3 = nd.random_normal(shape=(num_hidden2, num_outputs), scale=weight_scale2)

b3 = nd.zeros(num_outputs)

  

#参数变多了

params = [W1, b1, W2, b2, W3, b3]

 

...

 

#计算模型

def net(X):

    X = X.reshape((-1, num_inputs))

    #先计算到隐藏层的输出

    h1 = relu(nd.dot(X, W1) + b1)

    h2 = relu(nd.dot(h1,W2) + b2)

    #再利用隐藏层计算最终的输出

    output = nd.dot(h2, W3) + b3

    return output

 

0人推荐
随时随地看视频
慕课网APP