-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathobject_discovery.py
171 lines (133 loc) · 5.64 KB
/
object_discovery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Copyright 2021 - Valeo Comfort and Driving Assistance - Oriane Siméoni @ valeo.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
import scipy
import scipy.ndimage
import numpy as np
from datasets import bbox_iou
def lost(feats, dims, scales, init_image_size, k_patches=100):
"""
Implementation of LOST method.
Inputs
feats: the pixel/patche features of an image
dims: dimension of the map from which the features are used
scales: from image to map scale
init_image_size: size of the image
k_patches: number of k patches retrieved that are compared to the seed at seed expansion
Outputs
pred: box predictions
A: binary affinity matrix
scores: lowest degree scores for all patches
seed: selected patch corresponding to an object
"""
# Compute the similarity
A = (feats @ feats.transpose(1, 2)).squeeze()
# Compute the inverse degree centrality measure per patch
sorted_patches, scores = patch_scoring(A)
# Select the initial seed
seed = sorted_patches[0]
# Seed expansion
potentials = sorted_patches[:k_patches]
similars = potentials[A[seed, potentials] > 0.0]
M = torch.sum(A[similars, :], dim=0)
# Box extraction
pred, _ = detect_box(
M, seed, dims, scales=scales, initial_im_size=init_image_size[1:]
)
return np.asarray(pred), A, scores, seed
def patch_scoring(M, threshold=0.):
"""
Patch scoring based on the inverse degree.
"""
# Cloning important
A = M.clone()
# Zero diagonal
A.fill_diagonal_(0)
# Make sure symmetric and non nul
A[A < 0] = 0
C = A + A.t()
# Sort pixels by inverse degree
cent = -torch.sum(A > threshold, dim=1).type(torch.float32)
sel = torch.argsort(cent, descending=True)
return sel, cent
def detect_box(A, seed, dims, initial_im_size=None, scales=None):
"""
Extract a box corresponding to the seed patch. Among connected components extract from the affinity matrix, select the one corresponding to the seed patch.
"""
w_featmap, h_featmap = dims
correl = A.reshape(w_featmap, h_featmap).float()
# Compute connected components
labeled_array, num_features = scipy.ndimage.label(correl.cpu().numpy() > 0.0)
# Find connected component corresponding to the initial seed
cc = labeled_array[np.unravel_index(seed.cpu().numpy(), (w_featmap, h_featmap))]
# Should not happen with LOST
if cc == 0:
raise ValueError("The seed is in the background component.")
# Find box
mask = np.where(labeled_array == cc)
# Add +1 because excluded max
ymin, ymax = min(mask[0]), max(mask[0]) + 1
xmin, xmax = min(mask[1]), max(mask[1]) + 1
# Rescale to image size
r_xmin, r_xmax = scales[1] * xmin, scales[1] * xmax
r_ymin, r_ymax = scales[0] * ymin, scales[0] * ymax
pred = [r_xmin, r_ymin, r_xmax, r_ymax]
# Check not out of image size (used when padding)
if initial_im_size:
pred[2] = min(pred[2], initial_im_size[1])
pred[3] = min(pred[3], initial_im_size[0])
# Coordinate predictions for the feature space
# Axis different then in image space
pred_feats = [ymin, xmin, ymax, xmax]
return pred, pred_feats
def dino_seg(attn, dims, patch_size, head=0):
"""
Extraction of boxes based on the DINO segmentation method proposed in https://github.com/facebookresearch/dino.
Modified from https://github.com/facebookresearch/dino/blob/main/visualize_attention.py
"""
w_featmap, h_featmap = dims
nh = attn.shape[1]
official_th = 0.6
# We keep only the output patch attention
# Get the attentions corresponding to [CLS] token
attentions = attn[0, :, 0, 1:].reshape(nh, -1)
# we keep only a certain percentage of the mass
val, idx = torch.sort(attentions)
val /= torch.sum(val, dim=1, keepdim=True)
cumval = torch.cumsum(val, dim=1)
th_attn = cumval > (1 - official_th)
idx2 = torch.argsort(idx)
for h in range(nh):
th_attn[h] = th_attn[h][idx2[h]]
th_attn = th_attn.reshape(nh, w_featmap, h_featmap).float()
# Connected components
labeled_array, num_features = scipy.ndimage.label(th_attn[head].cpu().numpy())
# Find the biggest component
size_components = [np.sum(labeled_array == c) for c in range(np.max(labeled_array))]
if len(size_components) > 1:
# Select the biggest component avoiding component 0 corresponding to background
biggest_component = np.argmax(size_components[1:]) + 1
else:
# Cases of a single component
biggest_component = 0
# Mask corresponding to connected component
mask = np.where(labeled_array == biggest_component)
# Add +1 because excluded max
ymin, ymax = min(mask[0]), max(mask[0]) + 1
xmin, xmax = min(mask[1]), max(mask[1]) + 1
# Rescale to image
r_xmin, r_xmax = xmin * patch_size, xmax * patch_size
r_ymin, r_ymax = ymin * patch_size, ymax * patch_size
pred = [r_xmin, r_ymin, r_xmax, r_ymax]
return pred