-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
329 lines (258 loc) · 11.4 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
import math as m
import os
import numpy as np
from scipy.interpolate import interp1d
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
BLOCKSIZE = 256
class CUDAManipulator(object):
'''
Class for using CUDA, manipulating objects and textures conveniently.
'''
def load_texture(self, name, arr):
'''
Loads an array into a texture with a name.
Address by the name in the kernel code.
'''
tex = self.mod.get_texref(name) # x*y*z
arr = arr.astype('float32')
if len(arr.shape) == 3:
carr = arr.copy('F')
texarray = numpy3d_to_array(carr, 'F')
tex.set_array(texarray)
else:
if len(arr.shape) == 1:
arr = np.expand_dims(arr, 1)
tex.set_flags(0)
cuda.matrix_to_texref(arr, tex, order='F')
tex.set_address_mode(0, cuda.address_mode.CLAMP)
tex.set_address_mode(1, cuda.address_mode.CLAMP)
tex.set_flags(cuda.TRSF_NORMALIZED_COORDINATES)
tex.set_filter_mode(cuda.filter_mode.LINEAR)
self.textures[name] = tex
def load_constant(self, name, val):
'''
Loads a constant into memory by name in kernel code.
If val is a float, int, char, etc., it must be wrapped by
np.float32() or np.int32() or similar.
'''
cuda.memcpy_htod(self.mod.get_global(name)[0], val)
def clear_textures(self):
'''
Removes all textures from memory.
'''
self.textures = {}
def __init__(self, cuda_code, include_dirs=[]):
include_dirs += [os.environ['CUDA_HOME'] + '/samples/common/inc',
os.path.dirname(os.path.abspath(__file__))]
self.mod = SourceModule(cuda_code, no_extern_c=True,
include_dirs=include_dirs)
class Renderer(CUDAManipulator):
'''
Superclass for rendering 3D models.
Takes care of most useful functions for 3D rendering,
such as calculating view vectors, calculating start points
for individual pixels, determining whether or not in box,
splitting up large datasets along the x-axis, etc.
Everything else, e.g. iterating through the spaces
and integrating must be done by the attached CUDA module
and the parameter spec_render to render().
To see the variables provided for use in the CUDA module,
see renderer.h
'''
maxgridsize = 10000000
projection_x_size = 640 # size of the output array
projection_y_size = 640
stepsize = 0.2 # size of a step along the LOS
distance_per_pixel = 0.06 # distance (specified by axes) between pixels in the output
x_pixel_offset = 0 # if 0, the output is centered in the box center--shifts output L/R
y_pixel_offset = 0 # shifts output U/D
mod = None # CUDA kernel code
xaxis = yaxis = zaxis = ixaxis = iyaxis = izaxis = None
textures = {} # stores references to textures to prevent them being cleared
def render(self, azimuth, altitude,
consts, tables, split_tables,
spec_render, verbose=True):
'''
Renders with a view of azimuth and altitude.
Loads constants (list of tuples of name, value),
tables (list of tuples of name, value).
It splits up the rendering space along the x-axis,
loading a corresponding portion of each table in split_tables,
and rendering a portion at a time by calling spec_render.
spec_render is func with format
spec_render(self, blocksize, gridsize)
and is where the CUDA kernel is actually called.
'''
self.clear_textures()
view_x, view_y, view_vector = view_axes(azimuth, altitude)
#add axis inverse lookup tables, as well as x axis lookup table
tables.extend([('xtex', np.expand_dims(self.xaxis, 1)),
('ixtex', np.expand_dims(self.ixaxis, 1)),
('iytex', np.expand_dims(self.iyaxis, 1)),
('iztex', np.expand_dims(self.izaxis, 1))])
for tup in tables:
self.load_texture(*tup)
for tup in consts:
self.load_constant(*tup)
if verbose:
print('Loaded textures, computed emissivities')
xsplitsize = self.maxgridsize / (self.yaxis.size * self.zaxis.size)
numsplits = (self.xaxis.size + xsplitsize - 1) / xsplitsize
self.load_constant('viewVector', view_vector)
self.load_constant('viewX', view_x)
self.load_constant('viewY', view_y)
self.load_constant('ds', np.float32(self.stepsize))
self.load_constant('projectionXsize', np.int32(self.projection_x_size))
self.load_constant('projectionYsize', np.int32(self.projection_y_size))
self.load_constant('distancePerPixel', np.float32(self.distance_per_pixel))
self.load_constant('xPixelOffset', np.float32(self.x_pixel_offset))
self.load_constant('yPixelOffset', np.float32(self.y_pixel_offset))
for i in xrange(numsplits):
xstart = i * xsplitsize
if xstart + xsplitsize > self.xaxis.size:
xsplitsize = self.xaxis.size - xstart
if verbose:
print('Rendering x\'-coords ' + str(xstart) + '-' +
str(xstart + xsplitsize) + ' of ' + str(self.xaxis.size))
for tup in split_tables:
name, table = tup
self.load_texture(name, table[xstart:xstart + xsplitsize])
data_size = self.projection_x_size * self.projection_y_size
grid_size = (data_size + BLOCKSIZE - 1) / BLOCKSIZE
self.load_constant('xstart', np.int32(xstart))
self.load_constant('sliceWidth', np.int32(xsplitsize))
spec_render(self, BLOCKSIZE, grid_size)
def __init__(self, cuda_code):
'''
Initialize with a cuda kernel.
CUDA kernel should include renderer.h to utilize this
'''
super(Renderer, self).__init__(cuda_code)
def set_axes(self, xaxis, yaxis, zaxis):
'''
Sets the x, y, and z axes.
x, y, z axes are lists correlating indices (in textures)
to locations in space.
'''
self.xaxis = xaxis
self.yaxis = yaxis
self.zaxis = zaxis
self.load_constant('xmin', np.float32(xaxis.min()))
self.load_constant('xmax', np.float32(xaxis.max()))
self.load_constant('ymin', np.float32(yaxis.min()))
self.load_constant('ymax', np.float32(yaxis.max()))
self.load_constant('zmin', np.float32(zaxis.min()))
self.load_constant('zmax', np.float32(zaxis.max()))
self.load_constant('xtotalsize', np.int32(xaxis.size))
self.ixaxis = norm_inverse_axis(xaxis)
self.iyaxis = norm_inverse_axis(yaxis)
self.izaxis = norm_inverse_axis(zaxis)
class SingAxisRenderer(CUDAManipulator):
zcutoff = -1.0 # the point at which we are considered to be no longer in the chromosphere
locph = prev_lambd = float('nan')
snap = 0
projection_x_size = projection_y_size = 640
nsteps = 600
ux = uy = uz = e = r = oscdata = ka_table = opatab = None
def render(self, axis, reverse, consts, tables, split_tables,
spec_render, verbose=True):
self.clear_textures()
input_size = self.xaxis.size * self.yaxis.size * self.zaxis.size
numsplits = (input_size - 1) / self.maxgridsize + 1
if axis == 'x':
intaxis = self.xaxis
ax_id = 0
elif axis == 'y':
intaxis = self.yaxis
ax_id = 1
else:
intaxis = self.zaxis
ax_id = 2
intaxis_size = intaxis.size
splitsize = np.ceil(intaxis_size / numsplits)
consts += [('projectionXsize', np.int32(self.projection_x_size)),
('projectionYsize', np.int32(self.projection_y_size)),
('axis', np.uint8(ord(axis))),
('reverse', np.int8(reverse)),
('nsteps', np.int32(self.nsteps))]
for tup in tables:
self.load_texture(*tup)
for tup in consts:
self.load_constant(*tup)
if verbose:
print('Loaded textures, computed emissivities')
#split_tables is tables to split, table_splits is list of split tables
table_splits = {name: np.array_split(table, numsplits, ax_id) for name, table in split_tables}
table_splits['aptex'] = np.array_split(np.gradient(intaxis), numsplits)
if reverse:
for name in table_splits:
table_splits[name].reverse()
for i in xrange(numsplits):
start = i * splitsize
if start + splitsize > intaxis_size:
splitsize = intaxis_size - start
if verbose:
print('Rendering ' + axis + '-coords ' + str(start) + '-' +
str(start + splitsize) + ' of ' + str(intaxis_size))
for name, table_split in table_splits.items():
self.load_texture(name, table_split[i])
data_size = self.projection_x_size * self.projection_y_size
grid_size = (data_size + BLOCKSIZE - 1) / BLOCKSIZE
spec_render(self, BLOCKSIZE, grid_size)
def numpy3d_to_array(np_array, order=None):
'''
Method for copying a numpy array to a CUDA array
If you get a buffer error, run this method on np_array.copy('F')
'''
from pycuda.driver import Array, ArrayDescriptor3D, Memcpy3D, dtype_to_array_format
if order is None:
order = 'C' if np_array.strides[0] > np_array.strides[2] else 'F'
if order.upper() == 'C':
d, h, w = np_array.shape
elif order.upper() == "F":
w, h, d = np_array.shape
else:
raise Exception("order must be either F or C")
descr = ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = dtype_to_array_format(np_array.dtype)
descr.num_channels = 1
descr.flags = 0
device_array = Array(descr)
copy = Memcpy3D()
copy.set_src_host(np_array)
copy.set_dst_array(device_array)
copy.width_in_bytes = copy.src_pitch = np_array.strides[1]
copy.src_height = copy.height = h
copy.depth = d
copy()
return device_array
def view_axes(azimuth, altitude):
'''
The vectors that correspond to a POV with azimuth and altitude
view_vector is the line following the LOS,
view_x and view_y are the directions you shift
in space as you move left/right
'''
altitude = sorted((altitude, 90, -90))[1]
altitude = -altitude
azimuth = azimuth % 360 - 180
azimuth = m.radians(int(azimuth))
altitude = m.radians(int(altitude))
view_vector = np.array((m.cos(altitude) * m.cos(azimuth), m.cos(altitude) * m.sin(azimuth), m.sin(altitude)))
view_x = np.array((m.sin(azimuth), -m.cos(azimuth), 0))
view_y = np.cross(view_x, view_vector)
return (view_x.astype('float32'), view_y.astype('float32'), view_vector.astype('float32'))
def norm_inverse_axis(axis):
'''
From an axis (corresponds table indices to real coordinates),
generate an inverse lookup table that will take real coordinates
(normalized to (0, 1)) and return a normalized (0, 1) lookup index
'''
axinverse = interp1d((axis - axis.min()) / np.ptp(axis),
np.linspace(0, 1, axis.size))
return axinverse(np.linspace(0, 1, axis.size * 6)).astype('float32')