4.2.5 Inception-mobile_net实战
Inception-Net
Inception Net的思想是分组卷积,上一层分成几组卷积,卷积完成之后在把分组的结果拼接起来
可以进行扩展,每个组有很多层,这里只实现基本的分组卷积
# 定义 Inception-Net的分组结构def inception_block(x, output_channel_for_each_path, name): """inception block implementation""" """ Args: - x: 输入数据 - output_channel_for_each_path: 每组的输出通道数目 eg: [10,20,30] - name: 每组的卷积命名 """ # variable_scope 在这个scope下命名不会有冲突 conv1 = 'conv1' => scope_name/conv1 with tf.variable_scope(name): conv1_1 = tf.layers.conv2d(x, output_channel_for_each_path[0], (1, 1), strides = (1,1), padding = 'same', activation = tf.nn.relu, name = 'conv1_1') conv3_3 = tf.layers.conv2d(x, output_channel_for_each_path[1], (3, 3), strides = (1,1), padding = 'same', activation = tf.nn.relu, name = 'conv3_3') conv5_5 = tf.layers.conv2d(x, output_channel_for_each_path[0], (5, 5), strides = (1,1), padding = 'same', activation = tf.nn.relu, name = 'conv5_5') max_pooling = tf.layers.max_pooling2d(x, (2,2), (2,2), name = 'max_pooling') # max_pooling 会使得图像变小,所以需要padding max_pooling_shape = max_pooling.get_shape().as_list()[1:] input_shape = x.get_shape().as_list()[1:] width_padding = (input_shape[0] - max_pooling_shape[0]) // 2 height_padding = (input_shape[1] - max_pooling_shape[1]) // 2 padded_pooling = tf.pad(max_pooling, [[0,0], [width_padding,width_padding], [height_padding,height_padding], [0,0]]) # 在第四个维度(通道数)上做拼接 concat_layer = tf.concat( [conv1_1, conv3_3, conv5_5, padded_pooling], axis = 3) return concat_layer x = tf.placeholder(tf.float32, [None, 3072]) y = tf.placeholder(tf.int64, [None])# 将向量变成具有三通道的图片的格式x_image = tf.reshape(x, [-1,3,32,32])# 32*32x_image = tf.transpose(x_image, perm = [0, 2, 3, 1])# 先经过一个普通的卷积层和池化层# conv1:神经元图,feature map,输出图像conv1 = tf.layers.conv2d(x_image, 32, # output channel number (3,3), # kernal size padding = 'same', # same 代表输出图像的大小没有变化,valid 代表不做padding activation = tf.nn.relu, name = 'conv1')# 16*16pooling1 = tf.layers.max_pooling2d(conv1, (2, 2), # kernal size (2, 2), # stride name = 'pool1' # name为了给这一层做一个命名,这样会让图打印出来的时候会是一个有意义的图 )# 经过两个个分组卷积inception_2a = inception_block(pooling1, [16, 16, 16], name = 'inception_2a') inception_2b = inception_block(inception_2a, [16, 16, 16], name = 'inception_2b')# 接一个池化pooling2 = tf.layers.max_pooling2d(inception_2b, (2, 2), (2, 2), name = 'pool2' )# 再经过两个分组卷积核一个池化inception_3a = inception_block(pooling2, [16, 16, 16], name = 'inception_3a') inception_3b = inception_block(inception_3a, [16, 16, 16], name = 'inception_3b') pooling3 = tf.layers.max_pooling2d(inception_3b, (2, 2), (2, 2), name = 'pool3' )# [None, 4*4*42] 将三通道的图形转换成矩阵flatten = tf.layers.flatten(pooling3) y_ = tf.layers.dense(flatten, 10)# 交叉熵loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)# y_-> softmax# y -> one_hot# loss = ylogy_# boolpredict = tf.argmax(y_, 1)# [1,0,1,1,1,0,0,0]correct_prediction = tf.equal(predict, y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))with tf.name_scope('train_op'): train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
Mobile-Net
Mobile Net 的基本结构 深度可分类的卷积 -> BN ->RELU-> 1*1 的卷积 -> BN -> RELU
这里BN先不加,这是下节课的内容
image.png
def separable_conv_block(x, output_channel_number, name): """separable_conv block implementation""" """ Args: - x: 输入数据 - output_channel_number: 经过深度可分离卷积之后,再经过1*1 的卷积生成的通道数目 - name: 每组的卷积命名 """ # variable_scope 在这个scope下命名不会有冲突 conv1 = 'conv1' => scope_name/conv1 with tf.variable_scope(name): input_channel = x.get_shape().as_list()[-1] # 将x 在 第四个维度(axis+1) 上 拆分成 input_channel 份 # channel_wise_x: [channel1, channel2, ...] channel_wise_x = tf.split(x, input_channel, axis = 3) output_channels = [] for i in range(len(channel_wise_x)): output_channel = tf.layers.conv2d(channel_wise_x[i], 1, (3,3), strides = (1,1), padding = 'same', activation = tf.nn.relu, name = 'conv_%d' % i) output_channels.append(output_channel) concat_layers = tf.concat(output_channels, axis = 3) conv1_1 = tf.layers.conv2d(concat_layers, output_channel_number, (1,1), strides = (1,1), padding = 'same', activation = tf.nn.relu, name = 'conv1_1') return conv1_1 x = tf.placeholder(tf.float32, [None, 3072]) y = tf.placeholder(tf.int64, [None])# 将向量变成具有三通道的图片的格式x_image = tf.reshape(x, [-1,3,32,32])# 32*32x_image = tf.transpose(x_image, perm = [0, 2, 3, 1])# conv1:神经元图,feature map,输出图像conv1 = tf.layers.conv2d(x_image, 32, # output channel number (3,3), # kernal size padding = 'same', # same 代表输出图像的大小没有变化,valid 代表不做padding activation = tf.nn.relu, name = 'conv1')# 16*16pooling1 = tf.layers.max_pooling2d(conv1, (2, 2), # kernal size (2, 2), # stride name = 'pool1' # name为了给这一层做一个命名,这样会让图打印出来的时候会是一个有意义的图 ) separable_2a = separable_conv_block(pooling1, 32, name = 'separable_2a') separable_2b = separable_conv_block(separable_2a, 32, name = 'separable_2b') pooling2 = tf.layers.max_pooling2d(separable_2b, (2, 2), (2, 2), name = 'pool2' ) separable_3a = separable_conv_block(pooling2, 32, name = 'separable_3a') separable_3b = separable_conv_block(separable_3a, 32, name = 'separable_3b') pooling3 = tf.layers.max_pooling2d(separable_3b, (2, 2), (2, 2), name = 'pool3')# [None, 4*4*42] 将三通道的图形转换成矩阵flatten = tf.layers.flatten(pooling3) y_ = tf.layers.dense(flatten, 10)# 交叉熵loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=y_)# y_-> softmax# y -> one_hot# loss = ylogy_# boolpredict = tf.argmax(y_, 1)# [1,0,1,1,1,0,0,0]correct_prediction = tf.equal(predict, y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))with tf.name_scope('train_op'): train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
这里的准确率是10000次百分之60,这是因为mobile net 的 参数减小和计算率减小影响了准确率。
这里的训练我们都使用的是一万次训练,真正的神经网络训练远不止于此,可能会达到100万次的规模
作者:Meet相识_bfa5
链接:https://www.jianshu.com/p/5a5bd59116fc