FPN中的proposal_layer(网络中的proposal)
最后有nms的实现,看看
layer {
name: 'proposal'type: 'Python'
bottom: 'im_info'
bottom: 'rpn_bbox_pred/p2'
bottom: 'rpn_bbox_pred/p3'
bottom: 'rpn_bbox_pred/p4'
bottom: 'rpn_bbox_pred/p5'
bottom: 'rpn_bbox_pred/p6' rpn_bbox_pred是通过 conv层产生的偏移量(proposal vs anchor)详情见rpn_data
bottom: 'fpn_out_reshape/p2'
bottom: 'fpn_out_reshape/p3'
bottom: 'fpn_out_reshape/p4'
bottom: 'fpn_out_reshape/p5'
bottom: 'fpn_out_reshape/p6' fpn_out_reshape是通过softmax层fpn_out产生的cls score
top: 'rpn_rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
param_str: "'feat_stride': 4,8,16,32,64"
}
}
# --------------------------------------------------------
# Faster R-CNN# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import caffe
import numpy as np
import yaml
from fast_rcnn.config import cfg
from generate_anchors import generate_anchors
from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
from fast_rcnn.nms_wrapper import nms
import numpy.random as npr
DEBUG = False
def vis_all_detection(im_array, detections, class_names, scale):
"""
visualize all detections in one image
:param im_array: [b=1 c h w] in rgb
:param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
:param class_names: list of names in imdb
:param scale: visualize the scaled image
:return:
"""
# print im_array.shape
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.pyplot import savefig
import random
a = [103.06 ,115.9 ,123.15]
a = np.array(a)
im = transform_inverse(im_array,a)
plt.imshow(im)
for j in range(detections.shape[0]):
# if class_names[j] == 0:
# continue
color = (random.random(), random.random(), random.random()) # generate a random color
dets = detections[j]
det =dets
bbox = det[0:]
score = det[0]
rect = plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
edgecolor=color, linewidth=3.5)
plt.gca().add_patch(rect)
# plt.gca().text(bbox[0], bbox[1] - 2,
# '{:s} {:.3f}'.format(str(class_names[j]), score),
# bbox=dict(facecolor=color, alpha=0.5), fontsize=12, color='white')
plt.show()
name = np.mean(im)
savefig ('vis/'+str(name)+'.png')
plt.clf()
plt.cla()
plt. close(0)
class ProposalLayer(caffe.Layer):
"""
Outputs object detection proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
def setup(self, bottom, top):
# parse the layer parameter string, which must be valid YAML
layer_params = yaml.load(self.param_str_)
self._feat_stride = [int(i) for i in layer_params['feat_stride'].split(',')]
self._scales = cfg.FPNRSCALES 默认2 ** np.arange(4,6)
self._ratios = cfg.FPNRATIOS 默认[0.5,1,2]
self._min_sizes = 16
self._num_anchors = len(self._scales)*len(self._ratios)
self._output_score = False
if DEBUG:
print 'feat_stride: {}'.format(self._feat_stride)
print 'anchors:'
print self._anchors
# rois blob: holds R regions of interest, each is a 5-tuple
# (n, x1, y1, x2, y2) specifying an image batch index n and a
# rectangle (x1, y1, x2, y2)
top[0].reshape(1, 5) 输出矩形框
# scores blob: holds scores for R regions of interest
if len(top) > 1:
top[1].reshape(1, 1, 1, 1) 输出score
def forward(self, bottom, top):
cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 默认12000/6000 train/test
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 默认2000/1000
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 默认0.7
min_size = self._min_sizes
# the first set of _num_anchors channels are bg probs
# the second set are the fg probs, which we want
im_info = bottom[0].data[0, :] 对应'im_info',来自roi_data_layer的minibatch.py
# blobs['im_info'] = np.array(
#[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
#dtype=np.float32)
raise ValueError("Sorry, multiple images each device is not implemented")
cls_prob_dict = { fpn_out_reshape是通过softmax层fpn_out产生的cls score
'stride64': bottom[10].data, 'fpn_out_reshape/p6'stride32': bottom[9].data, 'fpn_out_reshape/p5'
'stride16': bottom[8].data, 'fpn_out_reshape/p4'
'stride8': bottom[7].data, 'fpn_out_reshape/p3'
'stride4': bottom[6].data, 'fpn_out_reshape/p2'
}
bbox_pred_dict = { 不同尺寸对应的预测框偏移量,后面会用来生成预测框
'stride64': bottom[5].data, 'rpn_bbox_pred/p6'
'stride32': bottom[4].data, 'rpn_bbox_pred/p5'
'stride16': bottom[3].data, 'rpn_bbox_pred/p4'
'stride8': bottom[2].data, 'rpn_bbox_pred/p3'
'stride4': bottom[1].data, 'rpn_bbox_pred/p2'
}
proposal_list = []
score_list = []
for s in self._feat_stride:
stride = int(s)
原图第一个位置的9个anchor
scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :]
bbox_deltas = bbox_pred_dict['stride' + str(s)]
bbox_deltas应该是偏移量(中心+长宽),与rpn_bbox_pred层的权重有关。之后通过这个偏移以及原图anchor算出预测框proposal(bbox_transform_inv函数)。后面更新的应该是rpn_bbox_pred的权重
# 1. Generate proposals from bbox_deltas and shifted anchors# use real image size instead of padded feature map sizes
height, width = int(im_info[0] / stride), int(im_info[1] / stride) 原图尺寸/stride = 当前feature map尺寸
# Enumerate all shifts 映射到原图需要的偏移,用来生成anchor
shift_x = np.arange(0, width) * stride 遍历feature map上每个点,*stride为原图上对应位置
shift_y = np.arange(0, height) * stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
# Enumerate all shifted anchors:
#
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = self._num_anchors
K = shifts.shape[0]
在原图产生所有anchor
anchors = anchors.reshape((K * A, 4))
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
#
# bbox deltas will be (1, 4 * A, H, W) format
# transpose to (1, H, W, 4 * A)
# reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
# in slowest to fastest order
bbox_deltas = _clip_pad(bbox_deltas, (height, width))
bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
# Same story for the scores:
#
# scores are (1, A, H, W) format
# transpose to (1, H, W, A)
# reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
scores = _clip_pad(scores, (height, width))
scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
# Convert anchors into proposals via bbox transformations
偏移量+anchor = 预测框 proposal
# 2. clip predicted boxes to image
切边
去掉小框
# 3. remove predicted boxes with either height or width < threshold
# (NOTE: convert min_size to input image scale stored in im_info[2])
keep = _filter_boxes(proposals, min_size * im_info[2])
proposals = proposals[keep, :]
scores = scores[keep]
proposal_list.append(proposals)
score_list.append(scores)
proposals = np.vstack(proposal_list)
scores = np.vstack(score_list)
按score排序后nms
# 4. sort all (proposal, score) pairs by score from highest to lowest
# 5. take top pre_nms_topN (e.g. 6000)
order = scores.ravel().argsort()[::-1]
if pre_nms_topN > 0:
order = order[:pre_nms_topN]
proposals = proposals[order, :]
scores = scores[order]
# 6. apply nms (e.g. threshold = 0.7)
# 7. take after_nms_topN (e.g. 300)
# 8. return the top proposals (-> RoIs top)
det = np.hstack((proposals, scores)).astype(np.float32)
keep = nms(det,nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
# pad to ensure output size remains unchanged
if len(keep) < post_nms_topN:
pad = npr.choice(keep, size=post_nms_topN - len(keep))
keep = np.hstack((keep, pad))
多退少补
# pad to ensure output size remains unchanged
if len(keep) < post_nms_topN:
try:
pad = npr.choice(keep, size=post_nms_topN - len(keep))
except:
proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
proposals[:,2] = 16
proposals[:,3] = 16
batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 预留第一列位置全0
top[0].reshape(*(blob.shape))
top[0].data[...] = blob
return
keep = np.hstack((keep, pad))
proposals = proposals[keep, :]
scores = scores[keep]
# Output rois array
# Our RPN implementation only supports a single input image, so all
# batch inds are 0
batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
# if is_train:
top[0].reshape(*(blob.shape))
top[0].data[...] = blob
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
def _filter_boxes(boxes, min_size):
""" Remove all boxes with any side smaller than min_size """
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
return keep
def _clip_pad(tensor, pad_shape):
"""
Clip boxes of the pad area.
:param tensor: [n, c, H, W]
:param pad_shape: [h, w]
:return: [n, c, h, w]
"""
H, W = tensor.shape[2:]
h, w = pad_shape
if h < H or w < W:
tensor = tensor[:, :, :h, :w].copy()
return tensor
def bbox_transform_inv(boxes, deltas):
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
dx = deltas[:, 0::4]
dy = deltas[:, 1::4]
dw = deltas[:, 2::4]
dh = deltas[:, 3::4]
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
# y2
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
return pred_boxes
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]]) 与其余框比较
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0] inds是连续的,且inds[0]对应order[0]下一个位置(score仅次于order[0]),所以要+1
order = order[inds + 1]return keep