-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsample_yolo.py
247 lines (204 loc) · 8.25 KB
/
sample_yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/usr/bin/python3
# Copyright (C) 2022 Infineon Technologies & pmdtechnologies ag
#
# THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
# KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
# PARTICULAR PURPOSE.
"""This sample shows how to use openCV on the depthdata we get back from either a camera or an rrf file.
The Camera's lens parameters are optionally used to remove the lens distortion and then the image is displayed using openCV windows.
Press 'd' on the keyboard to toggle the distortion while a window is selected. Press esc to exit.
Additionally this sample implements the YOLO v3 network for object detection. We convert the image to rgb and then feed this image
into the network. Then we draw bounding boxes around the found object.
"""
import argparse
import queue
import sys
import threading
import numpy as np
import cv2
# insert the path to your Royale installation here:
# note that you need to use \\ or / instead of \ on Windows
ROYALE_DIR = "C:/Program Files/royale/5.4.0.2112/python"
sys.path.append(ROYALE_DIR)
import roypy
from roypy_sample_utils import CameraOpener, add_camera_opener_options
from roypy_platform_utils import PlatformHelper
# YOLO code from: https://github.com/arunponnusamy/object-detection-opencv/blob/master/yolo_opencv.py
CLASSES = None
with open("yoloclasses.txt", 'r') as f:
CLASSES = [line.strip() for line in f.readlines()]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
net = cv2.dnn.readNet("yolov3-tiny.weights", "yolov3-tiny.cfg")
def get_output_layers(net):
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
return output_layers
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
label = str(CLASSES[class_id]) + " : " + "{:.2f}".format(confidence)
color = COLORS[class_id]
cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
def detectObjects(img):
Width = img.shape[1]
Height = img.shape[0]
scale = 1/255
blob = cv2.dnn.blobFromImage(img, scale, (416,416), (0,0,0), False, crop=False)
net.setInput(blob)
outs = net.forward(get_output_layers(net))
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.01
nms_threshold = 0.5
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.1:
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([x, y, w, h])
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
for i in indices:
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
draw_prediction(img, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
return img
# OPENCV SAMPLE + INTEGRATED OBJECT DETECTION WITH YOLO
class MyListener(roypy.IDepthDataListener):
def __init__(self, q):
super(MyListener, self).__init__()
self.frame = 0
self.done = False
self.undistortImage = True
self.lock = threading.Lock()
self.once = False
self.queue = q
def onNewData(self, data):
p = data.npoints()
self.queue.put(p)
def paint (self, data):
"""Called in the main thread, with data containing one of the items that was added to the
queue in onNewData.
"""
# mutex to lock out changes to the distortion while drawing
self.lock.acquire()
depth = data[:, :, 2]
gray = data[:, :, 3]
confidence = data[:, :, 4]
zImage = np.zeros(depth.shape, np.float32)
grayImage = np.zeros(depth.shape, np.float32)
# iterate over matrix, set zImage values to z values of data
# also set grayImage adjusted gray values
xVal = 0
yVal = 0
for x in zImage:
for y in x:
if confidence[xVal][yVal]> 0:
grayImage[xVal,yVal] = self.adjustGrayValue(gray[xVal][yVal])
yVal=yVal+1
yVal = 0
xVal = xVal+1
grayImage8 = np.uint8(grayImage)
# apply undistortion
if self.undistortImage:
grayImage8 = cv2.undistort(grayImage8,self.cameraMatrix,self.distortionCoefficients)
# convert the image to rgb first, because YOLO needs 3 channels, and then detect the objects
yoloResultImageGray = detectObjects(cv2.cvtColor(grayImage8, cv2.COLOR_GRAY2RGB))
# finally show the images
cv2.imshow("YOLO Objects on Gray Image", yoloResultImageGray)
self.lock.release()
self.done = True
def setLensParameters(self, lensParameters):
# Construct the camera matrix
# (fx 0 cx)
# (0 fy cy)
# (0 0 1 )
self.cameraMatrix = np.zeros((3,3),np.float32)
self.cameraMatrix[0,0] = lensParameters['fx']
self.cameraMatrix[0,2] = lensParameters['cx']
self.cameraMatrix[1,1] = lensParameters['fy']
self.cameraMatrix[1,2] = lensParameters['cy']
self.cameraMatrix[2,2] = 1
# Construct the distortion coefficients
# k1 k2 p1 p2 k3
self.distortionCoefficients = np.zeros((1,5),np.float32)
self.distortionCoefficients[0,0] = lensParameters['k1']
self.distortionCoefficients[0,1] = lensParameters['k2']
self.distortionCoefficients[0,2] = lensParameters['p1']
self.distortionCoefficients[0,3] = lensParameters['p2']
self.distortionCoefficients[0,4] = lensParameters['k3']
def toggleUndistort(self):
self.lock.acquire()
self.undistortImage = not self.undistortImage
self.lock.release()
# Map the gray values from the camera to 0..255
def adjustGrayValue(self,grayValue):
clampedVal = min(400,grayValue) # try different values, to find the one that fits your environment best
newGrayValue = clampedVal / 400 * 255
return newGrayValue
def main ():
# Set the available arguments
platformhelper = PlatformHelper()
parser = argparse.ArgumentParser (usage = __doc__)
add_camera_opener_options (parser)
options = parser.parse_args()
opener = CameraOpener (options)
try:
cam = opener.open_camera ()
except:
print("could not open Camera Interface")
sys.exit(1)
try:
# retrieve the interface that is available for recordings
replay = cam.asReplay()
print ("Using a recording")
print ("Framecount : ", replay.frameCount())
print ("File version : ", replay.getFileVersion())
except SystemError:
print ("Using a live camera")
q = queue.Queue()
l = MyListener(q)
cam.registerDataListener(l)
cam.startCapture()
lensP = cam.getLensParameters()
l.setLensParameters(lensP)
process_event_queue (q, l)
cam.stopCapture()
print("Done")
def process_event_queue (q, painter):
while True:
try:
# try to retrieve an item from the queue.
# this will block until an item can be retrieved
# or the timeout of 1 second is hit
if len(q.queue) == 0:
item = q.get(True, 1)
else:
for i in range (0, len (q.queue)):
item = q.get(True, 1)
except queue.Empty:
# this will be thrown when the timeout is hit
break
else:
painter.paint(item)
# waitKey is required to use imshow, we wait for 1 millisecond
currentKey = cv2.waitKey(1)
if currentKey == ord('d'):
painter.toggleUndistort()
# close if escape key pressed
if currentKey == 27:
break
if (__name__ == "__main__"):
main()