RPN
关于faster rcnn中PRN的介绍大家可以自己看paper或者找点论坛看看, medium, CSDN, 知乎, 包括简书都有大量的资料做介绍, 本文只站在源码的角度给你介绍每一步的实现, 所以就不阐述原理了,见谅~~
代码入口
lib/model/train_val.py# Construct the computation graph lr, train_op = self.construct_graph(sess)
lr是学习率, train_op是训练网络的一系列操作。
让我们走进construct_graph函数
lib/model/train_val.py def construct_graph(self, sess): with sess.graph.as_default(): # Set the random seed for tensorflow tf.set_random_seed(cfg.RNG_SEED) # Build the main computation graph layers = self.net.create_architecture('TRAIN', self.imdb.num_classes, tag='default', anchor_scales=cfg.ANCHOR_SCALES, anchor_ratios=cfg.ANCHOR_RATIOS) # Define the loss loss = layers['total_loss'] # Set learning rate and momentum lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) self.optimizer = tf.train.MomentumOptimizer(lr, cfg.TRAIN.MOMENTUM) # Compute the gradients with regard to the loss gvs = self.optimizer.compute_gradients(loss) # Double the gradient of the bias if set if cfg.TRAIN.DOUBLE_BIAS: final_gvs = [] with tf.variable_scope('Gradient_Mult') as scope: for grad, var in gvs: scale = 1. if cfg.TRAIN.DOUBLE_BIAS and '/biases:' in var.name: scale *= 2. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) train_op = self.optimizer.apply_gradients(final_gvs) else: train_op = self.optimizer.apply_gradients(gvs) # We will handle the snapshots ourselves self.saver = tf.train.Saver(max_to_keep=100000) # Write the train and validation information to tensorboard self.writer = tf.summary.FileWriter(self.tbdir, sess.graph) self.valwriter = tf.summary.FileWriter(self.tbvaldir) return lr, train_op
代码其实将流程阐述的非常清楚,我再废话给大家总结一下~~
给tensorflow设置随机种子seed(为啥要这样,可以百度一下)
建立一个计算图computational graph(重点,下面介绍)
定义了一个执行Momentum算法的优化器
accumulation = momentum * accumulation + gradient
variable -= learning_rate * accumulation
计算损失参数的梯度self.optimizer.compute_gradients(loss)
将梯度应用于变量self.optimizer.apply_gradients(gvs), 返回值就是train_op
定义Saver(用于快照-缓存), writer, valwriter(把信息及时传入tensorboard)
然后走进create_architecture函数
lib/nets/network.py def create_architecture(self, mode, num_classes, tag=None, anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)): self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3]) self._im_info = tf.placeholder(tf.float32, shape=[3]) self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5]) self._tag = tag self._num_classes = num_classes self._mode = mode self._anchor_scales = anchor_scales self._num_scales = len(anchor_scales) self._anchor_ratios = anchor_ratios self._num_ratios = len(anchor_ratios) self._num_anchors = self._num_scales * self._num_ratios training = mode == 'TRAIN' testing = mode == 'TEST' assert tag != None # handle most of the regularizers here weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY) if cfg.TRAIN.BIAS_DECAY: biases_regularizer = weights_regularizer else: biases_regularizer = tf.no_regularizer # list as many types of layers as possible, even if they are not used now with arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): rois, cls_prob, bbox_pred = self._build_network(training) layers_to_output = {'rois': rois} for var in tf.trainable_variables(): self._train_summaries.append(var) if testing: stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes)) self._predictions["bbox_pred"] *= stds self._predictions["bbox_pred"] += means else: self._add_losses() layers_to_output.update(self._losses) val_summaries = [] with tf.device("/cpu:0"): val_summaries.append(self._add_gt_image_summary()) for key, var in self._event_summaries.items(): val_summaries.append(tf.summary.scalar(key, var)) for key, var in self._score_summaries.items(): self._add_score_summary(key, var) for var in self._act_summaries: self._add_act_summary(var) for var in self._train_summaries: self._add_train_summary(var) self._summary_op = tf.summary.merge_all() self._summary_op_val = tf.summary.merge(val_summaries) layers_to_output.update(self._predictions) return layers_to_output
很多人(包括我自己)对tensorflow还不是很熟悉,所以这里还是给大家概括一下程序流程
给network的成员变量赋值
定义权重weights的正则regularizer
建立网络self._build_network(training) (重点)
定义损失函数, 包括RPN class loss, RPN bbox loss,整个RCNN网络的class loss和最终确定的物体边框bbox loss, 细节可以看这个函数_add_losses
更新一下tensorboard用得到的参数
然后我们了解一下_build_network函数
lib/nets/network.py def _build_network(self, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net_conv = self._image_to_head(is_training) with tf.variable_scope(self._scope, self._scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError fc7 = self._head_to_tail(pool5, is_training) with tf.variable_scope(self._scope, self._scope): # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
作者:油腻小年轻
链接:https://www.jianshu.com/p/6797b29c91bf