-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocessing.py
274 lines (229 loc) · 9.08 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import cv2
from torchvision import transforms
import torch
import numpy as np
import math
import random
from albumentations import (
BboxParams,
CenterCrop,
Compose,
Crop,
HorizontalFlip,
PadIfNeeded,
RandomCrop,
RandomSizedBBoxSafeCrop,
RandomSizedCrop,
Resize,
Rotate,
VerticalFlip,
)
class Preprocessing():
def __init__(self, S, B, C, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],
brightness=0, saturation=0, contrast=0, hue=0):
self.S, self.B, self.C = S, B, C
self.mean = mean
self.std = std
self.inv_mean = [((-1 * mean[i]) / std[i]) for i, m in enumerate(mean)]
self.inv_std = [(1 / x) for x in std]
self.normalize_transform = transforms.Normalize(mean=self.mean,
std=self.std)
self.unnormalize_transform = transforms.Normalize(mean=self.inv_mean,
std=self.inv_std)
self.brightness = brightness
self.saturation = saturation
self.contrast = contrast
self.hue = hue
self.color_transform = transforms.ColorJitter(
brightness, saturation, contrast, hue)
self.ToPILImage = transforms.ToPILImage(mode='RGB')
self.ToTensor = transforms.ToTensor()
def encode_labels(self, label):
S = self.S
B = self.B
C = self.C
ground_truth = np.zeros((S, S, B * 5 + C))
for xmin, ymin, xmax, ymax, cla in label:
x = (xmin + xmax) / 2
y = (ymin + ymax) / 2
w = xmax - xmin
h = ymax - ymin
# compute the grid row and column for this bbox
cell_size = 1. / S # relative size
grid_x = math.floor(x / cell_size)
grid_y = math.floor(y / cell_size)
# x,y relative to the cell
x = x - (grid_x * cell_size)
y = y - (grid_y * cell_size)
# Normalize x and y with respect to cell_size
# so that they are in the range [0, 1]
x = x / cell_size
y = y / cell_size
confidence = 1.
encoded_class = self.encode_class(int(cla), C)
target = ([x, y, w, h, confidence] * B) + \
encoded_class # length = B * xywhc + num_classes
ground_truth[grid_y, grid_x, :] = target
return ground_truth
def decode_label(self, label, img_h, img_w, obj_thresh=0.25):
"""
label: torch tensor (S, S, B*5 + C)
Return:
list of boxes with each box represented by
xmin,ymin,xmax,ymax,class, confidence
"""
# Convert to numpy
np_labels = label.cpu().numpy()
boxes = [] # x1, y1, x2, y2, class_num, confidence
for i in range(self.S):
for j in range(self.S):
bbox1, bbox2 = np_labels[i, j, :5], np_labels[i, j, 5:10]
if bbox1[4] > bbox2[4]:
box = bbox1
else:
box = bbox2
if box[4] > obj_thresh:
# x and y relative to the image instead of grid_cell
cell_size = 1./self.S
x, y = box[:2]
x *= cell_size
y *= cell_size
cell_xmin = j * cell_size
cell_ymin = i * cell_size
x = x + cell_xmin
y = y + cell_ymin
box[:2] = [x, y]
# scale x, y, w, h from [0-1] to [0-255]
x, y, w, h = box[:4]
x, w = x * img_w, w * img_w
y, h = y * img_h, h * img_h
# from xy,wh to x1y1,x2y2 (top left - botom right)
xmin = x - w/2
xmax = x + w/2
ymin = y - h/2
ymax = y + h/2
xmin, xmax, ymin, ymax = [
int(i) for i in [xmin, xmax, ymin, ymax]]
class_number = np.argmax(np_labels[i, j, 10:])
confidence = box[4]
boxes.append(
[xmin, ymin, xmax, ymax, class_number, confidence])
return boxes
def post_process_image(self, torch_img):
# Unormalize
img = self.unnormalize(torch_img)
img = self.channel_first_to_channel_last(img)
# Convert to numpy images
np_img = img.cpu().numpy().astype(np.uint8)
np_img = self.RGB2BGR(np_img)
return np_img
def encode_class(self, a_class, num_class):
encoded_class = [0] * num_class
encoded_class[a_class] = 1
return encoded_class
def channel_first_to_channel_last(self, img):
return img.permute(1, 2, 0)
def BGR2RGB(self, img):
"""
img: Numpy array
"""
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
def RGB2BGR(self, img):
"""
img: Numpy array
"""
return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
def normalize(self, imgs):
"""
imgs: Torch images range: 0-1, format RGB
"""
if len(imgs.shape) == 3:
# one image
return self.normalize_transform(imgs)
else:
# batch of images
ret = torch.zeros(imgs.shape)
for i in range(imgs.shape[0]):
ret[i] = self.normalize_transform(imgs[i])
return ret
def unnormalize(self, imgs):
if len(imgs.shape) == 3:
# one image
ret = self.unnormalize_transform(imgs)
else:
# batch of images
ret = torch.zeros(imgs.shape)
for i in range(imgs.shape[0]):
ret[i] = self.normalize_transform(imgs[i])
ret *= 255.
return ret
def apply_random_transforms(self, images, labels):
return self.random_shrink(images, labels)
def random_color_transform(self, img):
"""
Numpy image (RGB)
Randomly change brightness, saturation,
contrast and hue of a given torch image.
"""
pil_img = self.ToPILImage(img)
pil_img = self.color_transform(pil_img)
return np.array(pil_img)
def random_shrink(self, img, labels):
height = img.shape[0]
width = img.shape[1]
stripped_size = round(random.random() * 100)
resize = Resize(height - stripped_size, width - stripped_size,
always_apply=False, p=1)
img, labels = self._transform(img, labels, [resize])
pad = PadIfNeeded(min_height=height, min_width=width,
border_mode=4, value=None,
mask_value=None, always_apply=False, p=1.0)
return self._transform(img, labels, [pad])
def vertical_flip(self, img, labels):
return self._transform(img, labels, [VerticalFlip(p=1)])
def horizontal_flip(self, img, labels):
return self._transform(img, labels, [HorizontalFlip(p=1)])
def random_crop(self, img, labels):
height = img.shape[0]
width = img.shape[1]
crop = RandomSizedCrop(300, height, width, always_apply=False, p=1.0)
# crop = RandomSizedBBoxSafeCrop(height=height, width=height, p=1)
return self._transform(img, labels, [crop])
def random_rotate(self, img, labels):
angle = 20 # random rotate [-angle, angle]
border_mode = cv2.BORDER_CONSTANT # default 4
rotation = Rotate(limit=angle, border_mode=4,
value=None, mask_value=None,
always_apply=False, p=1)
return self._transform(img, labels, [rotation])
def _transform(self, img, labels, transformations):
aug = self._compose(transformations)
annotations = self._format_annotation(img, labels)
augmented_ann = aug(**annotations)
return self._unformat_annotation(augmented_ann)
def _format_annotation(self, image, label):
"""
image: Numpy image in format RGB
labels: List of bounding boxes with format
xmin, ymin, xmax, ymax, classe number.
"""
bboxes = []
classe_ids = []
for xyxyc in label:
bboxes.append(xyxyc[:4])
classe_ids.append(xyxyc[4])
return {'image': image, 'bboxes': bboxes, 'class_ids': classe_ids}
def _unformat_annotation(self, annotations):
image = annotations['image']
label = []
bboxes = annotations['bboxes']
class_ids = annotations['class_ids']
for bbox, class_id in zip(bboxes, class_ids):
label.append(list(bbox) + [class_id])
return image, label
def _compose(self, aug, min_area=0., min_visibility=0.):
bbox_params = BboxParams(format='albumentations', # xmin,xmax,ymin,ymax relatives
min_area=min_area,
min_visibility=min_visibility,
label_fields=['class_ids'])
return Compose(aug, bbox_params=bbox_params)