-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvl_phow.py
executable file
·160 lines (140 loc) · 6.28 KB
/
vl_phow.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from scipy import shape, dstack, sqrt, floor, array, mean, ones, vstack, hstack, ndarray
from vlfeat import vl_rgb2gray, vl_imsmooth, vl_dsift
from sys import maxint
"""
Python rewrite of https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
### notice no hsv support atm
### comments are largely copied from the code
"""
def vl_phow(im,
verbose=True,
fast=True,
sizes=[4, 6, 8, 10],
step=2,
color='rgb',
floatdescriptors=False,
magnif=6,
windowsize=1.5,
contrastthreshold=0.005):
opts = Options(verbose, fast, sizes, step, color, floatdescriptors,
magnif, windowsize, contrastthreshold)
dsiftOpts = DSiftOptions(opts)
# make sure image is float, otherwise segfault
im = array(im, 'float32')
# Extract the features
imageSize = shape(im)
if im.ndim == 3:
if imageSize[2] != 3:
# "IndexError: tuple index out of range" if both if's are checked at the same time
raise ValueError("Image data in unknown format/shape")
if opts.color == 'gray':
numChannels = 1
if (im.ndim == 2):
im = vl_rgb2gray(im)
else:
numChannels = 3
if (im.ndim == 2):
im = dstack([im, im, im])
if opts.color == 'rgb':
pass
elif opts.color == 'opponent':
# from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
# Note that the mean differs from the standard definition of opponent
# space and is the regular intesity (for compatibility with
# the contrast thresholding).
# Note also that the mean is added pack to the other two
# components with a small multipliers for monochromatic
# regions.
mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2]
alpha = 0.01
im = dstack([mu,
(im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu,
(im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu])
else:
raise ValueError('Color option ' + str(opts.color) + ' not recognized')
if opts.verbose:
print('{0}: color space: {1}'.format('vl_phow', opts.color))
print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1]))
print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes))
frames_all = []
descrs_all = []
for size_of_spatial_bins in opts.sizes:
# from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
# Recall from VL_DSIFT() that the first descriptor for scale SIZE has
# center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
# similar). It is convenient to align the descriptors at different
# scales so that they have the same geometric centers. For the
# maximum size we pick XMIN = 1 and we get centers starting from
# XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
# that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
# In pracrice, the offset must be integer ('bounds'), so the
# alignment works properly only if all OPTS.SZES are even or odd.
off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1
# smooth the image to the appropriate scale based on the size
# of the SIFT bins
sigma = size_of_spatial_bins / float(opts.magnif)
ims = vl_imsmooth(im, sigma)
# extract dense SIFT features from all channels
frames = []
descrs = []
for k in range(numChannels):
size_of_spatial_bins = int(size_of_spatial_bins)
# vl_dsift does not accept numpy.int64 or similar
f_temp, d_temp = vl_dsift(data=ims[:, :, k],
step=dsiftOpts.step,
size=size_of_spatial_bins,
fast=dsiftOpts.fast,
verbose=dsiftOpts.verbose,
norm=dsiftOpts.norm,
bounds=[off, off, maxint, maxint])
frames.append(f_temp)
descrs.append(d_temp)
frames = array(frames)
descrs = array(descrs)
d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]]
descrs = descrs.reshape(d_new_shape)
# remove low contrast descriptors
# note that for color descriptors the V component is
# thresholded
if (opts.color == 'gray') | (opts.color == 'opponent'):
contrast = frames[0][2, :]
elif opts.color == 'rgb':
contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0)
else:
raise ValueError('Color option ' + str(opts.color) + ' not recognized')
descrs[:, contrast < opts.contrastthreshold] = 0
# save only x,y, and the scale
frames_temp = array(frames[0][0:3, :])
padding = array(size_of_spatial_bins * ones(frames[0][0].shape))
frames_all.append(vstack([frames_temp, padding]))
descrs_all.append(array(descrs))
frames_all = hstack(frames_all)
descrs_all = hstack(descrs_all)
return frames_all, descrs_all
class Options(object):
def __init__(self, verbose, fast, sizes, step, color,
floatdescriptors, magnif, windowsize,
contrastthreshold):
self.verbose = verbose
self.fast = fast
if (type(sizes) is not ndarray) & (type(sizes) is not list):
sizes = array([sizes])
self.sizes = sizes
self.step = step
self.color = color
self.floatdescriptors = floatdescriptors
self.magnif = magnif
self.windowsize = windowsize
self.contrastthreshold = contrastthreshold
class DSiftOptions(object):
def __init__(self, opts):
self.norm = True
self.windowsize = opts.windowsize
self.verbose = opts.verbose
self.fast = opts.fast
self.floatdescriptors = opts.floatdescriptors
self.step = opts.step
if __name__ == "__main__":
from scipy.misc import imread
im = imread('image_0001.jpg')
frames, descrs = vl_phow(array(im, 'float32') / 255.0)