faster-rcnn anchor生成
def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2],
anchor_scales=[8, 16, 32]):
py = base_size / 2.
px = base_size / 2.
anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4),
dtype=np.float32)
for i in six.moves.range(len(ratios)):
for j in six.moves.range(len(anchor_scales)):
h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])
index = i * len(anchor_scales) + j
anchor_base[index, 0] = py - h / 2.
anchor_base[index, 1] = px - w / 2.
anchor_base[index, 2] = py + h / 2.
anchor_base[index, 3] = px + w / 2.
return anchor_base
def __test():
c = generate_anchor_base()
import cv2
import imageio
img = np.random.rand(600, 800);
img = img.astype(np.uint8)
print(img.shape)
buff = []
for x in c: #手动还原9个窗
cv2.rectangle(img, (x[1], x[0]), (x[3], x[2]), color=(255, 0, 0), thickness=3);
buff.append(img.copy()) #这是生成下面gif的代码
# cv2.imshow("Image", img)
# cv2.waitKey(0)
gif = imageio.mimsave('yang.gif', buff, 'GIF', duration=0.5)
cv2.destroyAllWindows()
这里得到h,w的公式是
(base_size*anchor_scale)**2=w*h
w/h = ratio
通过解这个式子可以得到w,h 如代码。
生成的anchor base是[9,4],可以用opencv画出来看一下,可以看出他是中心在左上角的位置为(8,8)的9个anchor,得到所有的anchor需要用它在图上滑动,下面代码是产生一个图上的所有anchor
def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
import numpy as xp
shift_y = xp.arange(0, height * feat_stride, feat_stride)
shift_x = xp.arange(0, width * feat_stride, feat_stride)
shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
shift_y.ravel(), shift_x.ravel()), axis=1)
A = anchor_base.shape[0]
K = shift.shape[0]
anchor = anchor_base.reshape((1, A, 4)) + \
shift.reshape((1, K, 4)).transpose((1, 0, 2))
anchor = anchor.reshape((K * A, 4)).astype(np.float32)
return anchor
这里是把坐标+到anchor上,我们可以一步一步看一下是怎么来的。
这里 假定测试数据图片为[600, 800]经过5次池化后变为[38,50] ,anchor_base=16,feat_stride=16,height=38,width=50
首先 meshgird是用来生成一篇区域中的网格的,这里
shift_x=array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592])
shift_y=array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784])
meshgrid之后分别为
[array([[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592],
...,
[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592]]),
array([[ 0, 0, 0, ..., 0, 0, 0],
[ 16, 16, 16, ..., 16, 16, 16],
[ 32, 32, 32, ..., 32, 32, 32],
...,
[752, 752, 752, ..., 752, 752, 752],
[768, 768, 768, ..., 768, 768, 768],
[784, 784, 784, ..., 784, 784, 784]])]
产生两个数组,一个是x坐标一个是y坐标,通过这个坐标就可以得到图片上面的所有点
经过stack之后
array([[[ 0, 0, 0, 0]],
[[ 0, 16, 0, 16]],
[[ 0, 32, 0, 32]],
...,
[[784, 560, 784, 560]],
[[784, 576, 784, 576]],
[[784, 592, 784, 592]]])
数组shape为[1900,1,4] anchor_base [1,9,4] 他们进行相加操作np会自动将他们扩维为[1900,9,4]和[1900,9,4]
得到的结果向加 就是最后结果
把得到的anchor画出来,这里只画了前100个anchor,可以看出它在原来9个anchor基础上,在往下扩展,然后再向右扩展