faster-rcnn anchor生成

def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2],
                         anchor_scales=[8, 16, 32]):
    py = base_size / 2.
    px = base_size / 2.

    anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4),
                           dtype=np.float32)
    for i in six.moves.range(len(ratios)):
        for j in six.moves.range(len(anchor_scales)):
            h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
            w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])

            index = i * len(anchor_scales) + j
            anchor_base[index, 0] = py - h / 2.
            anchor_base[index, 1] = px - w / 2.
            anchor_base[index, 2] = py + h / 2.
            anchor_base[index, 3] = px + w / 2.
    return anchor_base

def __test():
    c = generate_anchor_base()
    import cv2
    import imageio
    img = np.random.rand(600, 800);
    img = img.astype(np.uint8)
    print(img.shape)
    buff = []
    for x in c: #手动还原9个窗
        cv2.rectangle(img, (x[1], x[0]), (x[3], x[2]), color=(255, 0, 0), thickness=3);
        buff.append(img.copy())  #这是生成下面gif的代码
        # cv2.imshow("Image", img)
        # cv2.waitKey(0)

    gif = imageio.mimsave('yang.gif', buff, 'GIF', duration=0.5)
    cv2.destroyAllWindows()

这里得到h，w的公式是

(base_size*anchor_scale)**2=w*h

w/h = ratio

通过解这个式子可以得到w，h 如代码。

生成的anchor base是[9,4]，可以用opencv画出来看一下，可以看出他是中心在左上角的位置为(8,8)的9个anchor,得到所有的anchor需要用它在图上滑动，下面代码是产生一个图上的所有anchor

def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
    import numpy as xp
    shift_y = xp.arange(0, height * feat_stride, feat_stride)
    shift_x = xp.arange(0, width * feat_stride, feat_stride)
    shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
    shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
                      shift_y.ravel(), shift_x.ravel()), axis=1)

    A = anchor_base.shape[0]
    K = shift.shape[0]
    anchor = anchor_base.reshape((1, A, 4)) + \
             shift.reshape((1, K, 4)).transpose((1, 0, 2))
    anchor = anchor.reshape((K * A, 4)).astype(np.float32)
    return anchor

这里是把坐标+到anchor上，我们可以一步一步看一下是怎么来的。

这里假定测试数据图片为[600, 800]经过5次池化后变为[38,50] ，anchor_base=16,feat_stride=16,height=38,width=50

首先 meshgird是用来生成一篇区域中的网格的，这里

shift_x=array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592])

shift_y=array([ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784])

meshgrid之后分别为

[array([[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592],
...,
[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592],
[ 0, 16, 32, ..., 560, 576, 592]]),
array([[ 0, 0, 0, ..., 0, 0, 0],
[ 16, 16, 16, ..., 16, 16, 16],
[ 32, 32, 32, ..., 32, 32, 32],
...,
[752, 752, 752, ..., 752, 752, 752],
[768, 768, 768, ..., 768, 768, 768],
[784, 784, 784, ..., 784, 784, 784]])]

产生两个数组，一个是x坐标一个是y坐标，通过这个坐标就可以得到图片上面的所有点

经过stack之后

array([[[ 0, 0, 0, 0]],

[[ 0, 16, 0, 16]],

[[ 0, 32, 0, 32]],

...,
[[784, 560, 784, 560]],

[[784, 576, 784, 576]],

[[784, 592, 784, 592]]])

数组shape为[1900,1,4] anchor_base [1,9,4] 他们进行相加操作np会自动将他们扩维为[1900,9,4]和[1900,9,4]

得到的结果向加就是最后结果

把得到的anchor画出来，这里只画了前100个anchor，可以看出它在原来9个anchor基础上，在往下扩展，然后再向右扩展

faster-rcnn anchor生成

相关推荐