Faster RCNN源码解析(2)-原创手记-慕课网

RPN

关于faster rcnn中PRN的介绍大家可以自己看paper或者找点论坛看看, medium, CSDN, 知乎, 包括简书都有大量的资料做介绍, 本文只站在源码的角度给你介绍每一步的实现, 所以就不阐述原理了，见谅~~
代码入口

lib/model/train_val.py# Construct the computation graph
    lr, train_op = self.construct_graph(sess)

lr是学习率, train_op是训练网络的一系列操作。
让我们走进construct_graph函数

lib/model/train_val.py
  def construct_graph(self, sess):
    with sess.graph.as_default():      # Set the random seed for tensorflow
      tf.set_random_seed(cfg.RNG_SEED)      # Build the main computation graph
      layers = self.net.create_architecture('TRAIN', self.imdb.num_classes, tag='default',
                                            anchor_scales=cfg.ANCHOR_SCALES,
                                            anchor_ratios=cfg.ANCHOR_RATIOS)      # Define the loss
      loss = layers['total_loss']      # Set learning rate and momentum
      lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)      self.optimizer = tf.train.MomentumOptimizer(lr, cfg.TRAIN.MOMENTUM)      # Compute the gradients with regard to the loss
      gvs = self.optimizer.compute_gradients(loss)      # Double the gradient of the bias if set
      if cfg.TRAIN.DOUBLE_BIAS:
        final_gvs = []
        with tf.variable_scope('Gradient_Mult') as scope:          for grad, var in gvs:
            scale = 1.
            if cfg.TRAIN.DOUBLE_BIAS and '/biases:' in var.name:
              scale *= 2.
            if not np.allclose(scale, 1.0):
              grad = tf.multiply(grad, scale)
            final_gvs.append((grad, var))
        train_op = self.optimizer.apply_gradients(final_gvs)      else:
        train_op = self.optimizer.apply_gradients(gvs)      # We will handle the snapshots ourselves
      self.saver = tf.train.Saver(max_to_keep=100000)      # Write the train and validation information to tensorboard
      self.writer = tf.summary.FileWriter(self.tbdir, sess.graph)      self.valwriter = tf.summary.FileWriter(self.tbvaldir)    return lr, train_op

代码其实将流程阐述的非常清楚，我再废话给大家总结一下~~

给tensorflow设置随机种子seed(为啥要这样，可以百度一下)
建立一个计算图computational graph(重点，下面介绍)
定义了一个执行Momentum算法的优化器

accumulation = momentum * accumulation + gradient
variable -= learning_rate * accumulation

计算损失参数的梯度self.optimizer.compute_gradients(loss)
将梯度应用于变量self.optimizer.apply_gradients(gvs), 返回值就是train_op
定义Saver(用于快照-缓存), writer, valwriter(把信息及时传入tensorboard)

然后走进create_architecture函数

lib/nets/network.py
  def create_architecture(self, mode, num_classes, tag=None,
                          anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)):    self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3])    self._im_info = tf.placeholder(tf.float32, shape=[3])    self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])    self._tag = tag    self._num_classes = num_classes    self._mode = mode    self._anchor_scales = anchor_scales    self._num_scales = len(anchor_scales)    self._anchor_ratios = anchor_ratios    self._num_ratios = len(anchor_ratios)    self._num_anchors = self._num_scales * self._num_ratios

    training = mode == 'TRAIN'
    testing = mode == 'TEST'

    assert tag != None    # handle most of the regularizers here
    weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)    if cfg.TRAIN.BIAS_DECAY:
      biases_regularizer = weights_regularizer    else:
      biases_regularizer = tf.no_regularizer    # list as many types of layers as possible, even if they are not used now
    with arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                    slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], 
                    weights_regularizer=weights_regularizer,
                    biases_regularizer=biases_regularizer, 
                    biases_initializer=tf.constant_initializer(0.0)): 
      rois, cls_prob, bbox_pred = self._build_network(training)

    layers_to_output = {'rois': rois}    for var in tf.trainable_variables():      self._train_summaries.append(var)    if testing:
      stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes))
      means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes))      self._predictions["bbox_pred"] *= stds      self._predictions["bbox_pred"] += means    else:      self._add_losses()
      layers_to_output.update(self._losses)

      val_summaries = []
      with tf.device("/cpu:0"):
        val_summaries.append(self._add_gt_image_summary())        for key, var in self._event_summaries.items():
          val_summaries.append(tf.summary.scalar(key, var))        for key, var in self._score_summaries.items():          self._add_score_summary(key, var)        for var in self._act_summaries:          self._add_act_summary(var)        for var in self._train_summaries:          self._add_train_summary(var)      self._summary_op = tf.summary.merge_all()      self._summary_op_val = tf.summary.merge(val_summaries)

    layers_to_output.update(self._predictions)    return layers_to_output

很多人(包括我自己)对tensorflow还不是很熟悉，所以这里还是给大家概括一下程序流程

给network的成员变量赋值
定义权重weights的正则regularizer
建立网络self._build_network(training) (重点)
定义损失函数, 包括RPN class loss, RPN bbox loss,整个RCNN网络的class loss和最终确定的物体边框bbox loss, 细节可以看这个函数_add_losses
更新一下tensorboard用得到的参数

然后我们了解一下_build_network函数

lib/nets/network.py
  def _build_network(self, is_training=True):    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

    net_conv = self._image_to_head(is_training)
    with tf.variable_scope(self._scope, self._scope):      # build the anchors for the image
      self._anchor_component()      # region proposal network
      rois = self._region_proposal(net_conv, is_training, initializer)      # region of interest pooling
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv, rois, "pool5")      else:
        raise NotImplementedError

    fc7 = self._head_to_tail(pool5, is_training)
    with tf.variable_scope(self._scope, self._scope):      # region classification
      cls_prob, bbox_pred = self._region_classification(fc7, is_training, 
                                                        initializer, initializer_bbox)    self._score_summaries.update(self._predictions)    return rois, cls_prob, bbox_pred

作者：油腻小年轻
链接：https://www.jianshu.com/p/6797b29c91bf