通過下面這段程式碼拿到anchors以後

1
2
3
4
5
6
7
8
9
10
# Anchors
if mode == "training":
anchors = self.get_anchors(config.IMAGE_SHAPE)
# Duplicate across the batch dimension because Keras requires it
# TODO: can this be optimized to avoid duplicating the anchors?
anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape)
# A hack to get around Keras's bad support for constants
anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image)
else:
anchors = input_anchors

這裡總算要進行proposal layer計算了
將所有anchors還有相應對的分數跟偏移量當作輸入
而前面的東西都是class init的時候的設定

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
rpn_class_logits, rpn_class, rpn_bbox = outputs
# Generate proposals
# Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates
# and zero padded.
# POST_NMS_ROIS_INFERENCE = 1000
# POST_NMS_ROIS_TRAINING = 2000
proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\
else config.POST_NMS_ROIS_INFERENCE
rpn_rois = ProposalLayer(
proposal_count=proposal_count,
nms_threshold=config.RPN_NMS_THRESHOLD,
name="ROI",
config=config)([rpn_class, rpn_bbox, anchors]
)
if mode == "training":
# Class ID mask to mark class IDs supported by the dataset the image
# came from.
active_class_ids = KL.Lambda(
lambda x: parse_image_meta_graph(x)["active_class_ids"]
)(input_image_meta)
if not config.USE_RPN_ROIS:
# Ignore predicted ROIs and use ROIs provided as an input.
input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
name="input_roi", dtype=np.int32)
# Normalize coordinates
target_rois = KL.Lambda(lambda x: norm_boxes_graph(
x, K.shape(input_image)[1:3]))(input_rois)
else:
target_rois = rpn_rois

其中parse_image_meta_graph

1
2
3
4
5
6
7
8
return {
"image_id": image_id,
"original_image_shape": original_image_shape,
"image_shape": image_shape,
"window": window,
"scale": scale,
"active_class_ids": active_class_ids,
}

Proposal Layer

rpn_class:所有像素點BG/FG的機率值。
rpn_bbox:所有像素點對應anchor上的4個偏移值[dy, dx, log(dh), log(dw)]。
anchors: 剛剛通過預先生成的有序anchor列表,注意這裡有序表示feature_map上像素點生成的anchor以及該像素點生成的rpn_class和rpn_bbox是對應的(看paper看起來是這樣,但這樣理解對嗎, 有點不確定)
scores和deltas都是RPN中得到的

最終ProposalLayer會return一個經過bbox regression以及NMS過濾後anchor boxes set(稱為roi或proposal),至此已經完成了RPN啦~
init內super的用法可參考 你不知道的 super

要注意的事情是, 其實對於inference來說 這裡已經有RPN的結果了, 但對於training來說還少了一些東西, 就是跟grondtruth的比較還有等等, 所以才有後續的DetectionTargetLayer

另外Python中,如果在創建class的時候寫了call(), 那麼該class實例化出實例後, 實例名()就是調用call()。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class ProposalLayer(KE.Layer):
"""Receives anchor scores and selects a subset to pass as proposals
to the second stage. Filtering is done based on anchor scores and
non-max suppression to remove overlaps. It also applies bounding
box refinement deltas to anchors.
Inputs:
rpn_probs: [batch, anchors, (bg prob, fg prob)]
rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
anchors: [batch, (y1, x1, y2, x2)] anchors in normalized coordinates
Returns:
Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
"""
def __init__(self, proposal_count, nms_threshold, config=None, **kwargs):
super(ProposalLayer, self).__init__(**kwargs)
self.config = config
self.proposal_count = proposal_count
self.nms_threshold = nms_threshold
def call(self, inputs):
###實現了將傳入的anchors,及其scores、deltas進行topK的推薦和nms的推薦,最終輸出
###數量為proposal_counts的proposals。其中的scores和deltas都是RPN網絡中得到的
# Box Scores. Use the foreground class confidence. [Batch, num_rois, 1]
scores = inputs[0][:, :, 1]
# Box deltas [batch, num_rois, 4]
deltas = inputs[1]
deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4])
# Anchors
anchors = inputs[2]
# Improve performance by trimming to top anchors by score
# and doing the rest on the smaller subset.
pre_nms_limit = tf.minimum(6000, tf.shape(anchors)[1])
ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True,
name="top_anchors").indices
scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y),
self.config.IMAGES_PER_GPU)
deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y),
self.config.IMAGES_PER_GPU)
pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x),
self.config.IMAGES_PER_GPU,
names=["pre_nms_anchors"])
# Apply deltas to anchors to get refined anchors.
# [batch, N, (y1, x1, y2, x2)]
##利用deltas在anchors上,得到精煉後的boxs
boxes = utils.batch_slice([pre_nms_anchors, deltas],
lambda x, y: apply_box_deltas_graph(x, y),
self.config.IMAGES_PER_GPU,
names=["refined_anchors"])
# normalized coordinates就是對應原圖的百分比坐標
        # 下面的作用:防止修正後的anchor坐標超出了邊界即0<=x,y<=1
# Clip to image boundaries. Since we're in normalized coordinates,
# clip to 0..1 range. [batch, N, (y1, x1, y2, x2)]
window = np.array([0, 0, 1, 1], dtype=np.float32)
boxes = utils.batch_slice(boxes,
lambda x: clip_boxes_graph(x, window),
self.config.IMAGES_PER_GPU,
names=["refined_anchors_clipped"])
# Filter out small boxes
# According to Xinlei Chen's paper, this reduces detection accuracy
# for small objects, so we're skipping it.
# Non-max suppression
def nms(boxes, scores):
indices = tf.image.non_max_suppression(
boxes, scores, self.proposal_count,
self.nms_threshold, name="rpn_non_max_suppression")
proposals = tf.gather(boxes, indices)
# Pad if needed
padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0)
##利用deltas在anchors上,得到精化的boxs
proposals = tf.pad(proposals, [(0, padding), (0, 0)])
return proposals
proposals = utils.batch_slice([boxes, scores], nms,
self.config.IMAGES_PER_GPU)
return proposals
def compute_output_shape(self, input_shape):
return (None, self.proposal_count, 4)