1 | |
---|
2 | // Python - C extension module for gpu_domain.py |
---|
3 | // |
---|
4 | // To compile (Python2.3): |
---|
5 | // gcc -c %.c -I/usr/include/python2.3 -o %.o -Wall -O |
---|
6 | // gcc -shared %.o -o %.so cudafun.o -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lm |
---|
7 | // |
---|
8 | // or use python compile.py |
---|
9 | // |
---|
10 | // See the module shallow_water_domain.py for more documentation on |
---|
11 | // how to use this module |
---|
12 | // |
---|
13 | // Includes Cuda GPU usage: |
---|
14 | // memory allocation in first iteration, copying, and deletion in final |
---|
15 | // iteration; |
---|
16 | // requires linkage of cudafun.o, CUDA library |
---|
17 | // |
---|
18 | |
---|
19 | |
---|
20 | #include "Python.h" |
---|
21 | #include "numpy/arrayobject.h" |
---|
22 | #include "math.h" |
---|
23 | #include <stdio.h> |
---|
24 | #include <stdlib.h> |
---|
25 | #include "numpy_shim.h" |
---|
26 | #include "util_ext.h" |
---|
27 | |
---|
28 | #include "cudafun.h" |
---|
29 | |
---|
30 | //========================================================================= |
---|
31 | // Python Glue |
---|
32 | //========================================================================= |
---|
33 | |
---|
34 | PyObject *compute_fluxes_ext_central_new_gpu(PyObject *self, PyObject *args) { |
---|
35 | /*Compute all fluxes and the timestep suitable for all volumes |
---|
36 | in domain. GPU version. |
---|
37 | |
---|
38 | Compute total flux for each conserved quantity using "flux_function_central" |
---|
39 | |
---|
40 | Fluxes across each edge are scaled by edgelengths and summed up |
---|
41 | Resulting flux is then scaled by area and stored in |
---|
42 | explicit_update for each of the three conserved quantities |
---|
43 | stage, xmomentum and ymomentum |
---|
44 | |
---|
45 | The maximal allowable speed computed by the flux_function for each volume |
---|
46 | is converted to a timestep that must not be exceeded. The minimum of |
---|
47 | those is computed as the next overall timestep. |
---|
48 | |
---|
49 | First set up data structures on GPU, then copy data, call computational kernel, |
---|
50 | copy back. |
---|
51 | |
---|
52 | Python call: |
---|
53 | timestep = compute_fluxes(timestep, domain, stage, xmom, ymom, bed) |
---|
54 | |
---|
55 | |
---|
56 | Post conditions: |
---|
57 | domain.explicit_update is reset to computed flux values |
---|
58 | returns timestep which is the largest step satisfying all volumes. |
---|
59 | |
---|
60 | |
---|
61 | */ |
---|
62 | PyObject |
---|
63 | *domain, |
---|
64 | *stage, |
---|
65 | *xmom, |
---|
66 | *ymom, |
---|
67 | *bed; |
---|
68 | |
---|
69 | PyArrayObject |
---|
70 | *neighbours, |
---|
71 | *neighbour_edges, |
---|
72 | *normals, |
---|
73 | *edgelengths, |
---|
74 | *radii, |
---|
75 | *areas, |
---|
76 | *tri_full_flag, |
---|
77 | *stage_edge_values, |
---|
78 | *xmom_edge_values, |
---|
79 | *ymom_edge_values, |
---|
80 | *bed_edge_values, |
---|
81 | *stage_boundary_values, |
---|
82 | *xmom_boundary_values, |
---|
83 | *ymom_boundary_values, |
---|
84 | *stage_explicit_update, |
---|
85 | *xmom_explicit_update, |
---|
86 | *ymom_explicit_update, |
---|
87 | *already_computed_flux, //Tracks whether the flux across an edge has already been computed |
---|
88 | *max_speed_array; //Keeps track of max speeds for each triangle |
---|
89 | |
---|
90 | static int iteration = 0; |
---|
91 | // TODO: get final_iteration |
---|
92 | int final_iter = 1; |
---|
93 | double timestep, epsilon, H0, g; |
---|
94 | int i,optimise_dry_cells = 0; |
---|
95 | |
---|
96 | // Convert Python arguments to C |
---|
97 | if (!PyArg_ParseTuple(args, "dOOOOO", ×tep, &domain, &stage, &xmom, &ymom, &bed )) { |
---|
98 | report_python_error(AT, "could not parse input arguments"); |
---|
99 | return NULL; |
---|
100 | } |
---|
101 | |
---|
102 | epsilon = get_python_double(domain,"epsilon"); |
---|
103 | H0 = get_python_double(domain,"H0"); |
---|
104 | g = get_python_double(domain,"g"); |
---|
105 | //optimise_dry_cells = get_python_integer(domain,"optimse_dry_cells"); |
---|
106 | |
---|
107 | neighbours = get_consecutive_array(domain, "neighbours"); |
---|
108 | neighbour_edges = get_consecutive_array(domain, "neighbour_edges"); |
---|
109 | normals = get_consecutive_array(domain, "normals"); |
---|
110 | edgelengths = get_consecutive_array(domain, "edgelengths"); |
---|
111 | radii = get_consecutive_array(domain, "radii"); |
---|
112 | areas = get_consecutive_array(domain, "areas"); |
---|
113 | tri_full_flag = get_consecutive_array(domain, "tri_full_flag"); |
---|
114 | already_computed_flux = get_consecutive_array(domain, "already_computed_flux"); |
---|
115 | max_speed_array = get_consecutive_array(domain, "max_speed"); |
---|
116 | |
---|
117 | stage_edge_values = get_consecutive_array(stage, "edge_values"); |
---|
118 | xmom_edge_values = get_consecutive_array(xmom, "edge_values"); |
---|
119 | ymom_edge_values = get_consecutive_array(ymom, "edge_values"); |
---|
120 | bed_edge_values = get_consecutive_array(bed, "edge_values"); |
---|
121 | |
---|
122 | stage_boundary_values = get_consecutive_array(stage, "boundary_values"); |
---|
123 | xmom_boundary_values = get_consecutive_array(xmom, "boundary_values"); |
---|
124 | ymom_boundary_values = get_consecutive_array(ymom, "boundary_values"); |
---|
125 | |
---|
126 | stage_explicit_update = get_consecutive_array(stage, "explicit_update"); |
---|
127 | xmom_explicit_update = get_consecutive_array(xmom, "explicit_update"); |
---|
128 | ymom_explicit_update = get_consecutive_array(ymom, "explicit_update"); |
---|
129 | |
---|
130 | int number_of_elements = stage_edge_values -> dimensions[0]; |
---|
131 | |
---|
132 | printf("glue: dt: %f, noe: %i, eps: %f, H0: %f, g: %f\n", |
---|
133 | timestep, number_of_elements, epsilon, H0, g); fflush(stdout); |
---|
134 | |
---|
135 | // get array dimensions |
---|
136 | size_t number_of_neighbours = neighbours -> dimensions[0]; |
---|
137 | size_t number_of_neighbour_edges = neighbour_edges -> dimensions[0]; |
---|
138 | size_t number_of_normals = normals -> dimensions[0]; |
---|
139 | size_t number_of_edgelengths = edgelengths -> dimensions[0]; |
---|
140 | size_t number_of_radii = radii -> dimensions[0]; |
---|
141 | size_t number_of_areas = areas -> dimensions[0]; |
---|
142 | size_t number_of_tri_full_flag = tri_full_flag -> dimensions[0]; |
---|
143 | //size_t number_of_already_computed = already_computed_flux -> dimensions[0]; |
---|
144 | size_t number_of_max_speed_array = max_speed_array -> dimensions[0]; |
---|
145 | |
---|
146 | size_t number_of_stage_edge_values = stage_edge_values -> dimensions[0]; |
---|
147 | size_t number_of_xmom_edge_values = xmom_edge_values -> dimensions[0]; |
---|
148 | size_t number_of_ymom_edge_values = ymom_edge_values -> dimensions[0]; |
---|
149 | size_t number_of_bed_edge_values = bed_edge_values -> dimensions[0]; |
---|
150 | |
---|
151 | size_t number_of_stage_boundary_values = stage_boundary_values -> dimensions[0]; |
---|
152 | size_t number_of_xmom_boundary_values = xmom_boundary_values -> dimensions[0]; |
---|
153 | size_t number_of_ymom_boundary_values = ymom_boundary_values -> dimensions[0]; |
---|
154 | |
---|
155 | size_t number_of_stage_explicit_update = stage_explicit_update -> dimensions[0]; |
---|
156 | size_t number_of_xmom_explicit_update = xmom_explicit_update -> dimensions[0]; |
---|
157 | size_t number_of_ymom_explicit_update = ymom_explicit_update -> dimensions[0]; |
---|
158 | |
---|
159 | |
---|
160 | // extract C arrays from python wrapper |
---|
161 | long* c_neighbours = (long*) neighbours -> data; |
---|
162 | long* c_neighbour_edges = (long*) neighbour_edges -> data; |
---|
163 | double* c_normals = (double*) normals -> data; |
---|
164 | double* c_edgelengths = (double*) edgelengths -> data; |
---|
165 | double* c_radii = (double*) radii -> data; |
---|
166 | double* c_areas = (double*) areas -> data; |
---|
167 | long* c_tri_full_flag = (long*) tri_full_flag -> data; |
---|
168 | //long* c_already_computed_flux = (long*) already_computed_flux -> data; |
---|
169 | double* c_max_speed_array = (double*) max_speed_array -> data; |
---|
170 | |
---|
171 | double* c_stage_edge_values = (double*) stage_edge_values -> data; |
---|
172 | double* c_xmom_edge_values = (double*) xmom_edge_values -> data; |
---|
173 | double* c_ymom_edge_values = (double*) ymom_edge_values -> data; |
---|
174 | double* c_bed_edge_values = (double*) bed_edge_values -> data; |
---|
175 | |
---|
176 | double* c_stage_boundary_values = (double*) stage_boundary_values -> data; |
---|
177 | double* c_xmom_boundary_values = (double*) xmom_boundary_values -> data; |
---|
178 | double* c_ymom_boundary_values = (double*) ymom_boundary_values -> data; |
---|
179 | |
---|
180 | double* c_stage_explicit_update = (double*) stage_explicit_update -> data; |
---|
181 | double* c_xmom_explicit_update = (double*) xmom_explicit_update -> data; |
---|
182 | double* c_ymom_explicit_update = (double*) ymom_explicit_update -> data; |
---|
183 | |
---|
184 | setKernelDims( 32, 96 ); |
---|
185 | printKernelDims(); |
---|
186 | |
---|
187 | static long* gpu_neighbours = NULL; |
---|
188 | static long* gpu_neighbour_edges = NULL; |
---|
189 | static double* gpu_normals = NULL; |
---|
190 | static double* gpu_edgelengths = NULL; |
---|
191 | static double* gpu_radii = NULL; |
---|
192 | static double* gpu_areas = NULL; |
---|
193 | static long* gpu_tri_full_flag = NULL; |
---|
194 | //static long* gpu_already_computed_flux = NULL; |
---|
195 | static double* gpu_max_speed_array = NULL; |
---|
196 | |
---|
197 | static double* gpu_stage_edge_values = NULL; |
---|
198 | static double* gpu_xmom_edge_values = NULL; |
---|
199 | static double* gpu_ymom_edge_values = NULL; |
---|
200 | static double* gpu_bed_edge_values = NULL; |
---|
201 | |
---|
202 | static double* gpu_stage_boundary_values = NULL; |
---|
203 | static double* gpu_xmom_boundary_values = NULL; |
---|
204 | static double* gpu_ymom_boundary_values = NULL; |
---|
205 | |
---|
206 | static double* gpu_stage_explicit_update = NULL; |
---|
207 | static double* gpu_xmom_explicit_update = NULL; |
---|
208 | static double* gpu_ymom_explicit_update = NULL; |
---|
209 | |
---|
210 | |
---|
211 | if( 0 == iteration ) { |
---|
212 | selectDevice(0,1,""); |
---|
213 | // allocate GPU arrays |
---|
214 | gpu_neighbours = (long*) allocDeviceMemory( number_of_neighbours * sizeof(long) ); |
---|
215 | gpu_neighbour_edges = (long*) allocDeviceMemory( number_of_neighbour_edges * sizeof(long) ); |
---|
216 | gpu_normals = (double*) allocDeviceMemory( number_of_normals * sizeof(double) ); |
---|
217 | gpu_edgelengths = (double*) allocDeviceMemory( number_of_edgelengths * sizeof(double) ); |
---|
218 | gpu_radii = (double*) allocDeviceMemory( number_of_radii * sizeof(double) ); |
---|
219 | gpu_areas = (double*) allocDeviceMemory( number_of_areas * sizeof(double) ); |
---|
220 | gpu_tri_full_flag = (long*) allocDeviceMemory( number_of_tri_full_flag * sizeof(long) ); |
---|
221 | //gpu_already_computed_flux = (long*) allocDeviceMemory( number_of_already_computed * sizeof(long) ); |
---|
222 | gpu_max_speed_array = (double*) allocDeviceMemory( number_of_max_speed_array * sizeof(double) ); |
---|
223 | |
---|
224 | gpu_stage_edge_values = (double*) allocDeviceMemory( number_of_stage_edge_values * sizeof(double) ); |
---|
225 | gpu_xmom_edge_values = (double*) allocDeviceMemory( number_of_xmom_edge_values * sizeof(double) ); |
---|
226 | gpu_ymom_edge_values = (double*) allocDeviceMemory( number_of_ymom_edge_values * sizeof(double) ); |
---|
227 | gpu_bed_edge_values = (double*) allocDeviceMemory( number_of_bed_edge_values * sizeof(double) ); |
---|
228 | |
---|
229 | gpu_stage_boundary_values = (double*) allocDeviceMemory( number_of_stage_boundary_values * sizeof(double) ); |
---|
230 | gpu_xmom_boundary_values = (double*) allocDeviceMemory( number_of_xmom_boundary_values * sizeof(double) ); |
---|
231 | gpu_ymom_boundary_values = (double*) allocDeviceMemory( number_of_ymom_boundary_values * sizeof(double) ); |
---|
232 | |
---|
233 | gpu_stage_explicit_update = (double*) allocDeviceMemory( number_of_stage_explicit_update* sizeof(double) ); |
---|
234 | gpu_xmom_explicit_update = (double*) allocDeviceMemory( number_of_xmom_explicit_update * sizeof(double) ); |
---|
235 | gpu_ymom_explicit_update = (double*) allocDeviceMemory( number_of_ymom_explicit_update * sizeof(double) ); |
---|
236 | |
---|
237 | // Constant quantities copied only in first iteration |
---|
238 | copyHostToDevice( gpu_neighbours , c_neighbours , number_of_neighbours * sizeof(long) ); |
---|
239 | copyHostToDevice( gpu_neighbour_edges , c_neighbour_edges , number_of_neighbour_edges * sizeof(long) ); |
---|
240 | copyHostToDevice( gpu_normals , c_normals , number_of_normals * sizeof(double) ); |
---|
241 | copyHostToDevice( gpu_edgelengths , c_edgelengths , number_of_edgelengths * sizeof(double) ); |
---|
242 | copyHostToDevice( gpu_radii , c_radii , number_of_radii * sizeof(double) ); |
---|
243 | copyHostToDevice( gpu_areas , c_areas , number_of_areas * sizeof(double) ); |
---|
244 | } |
---|
245 | |
---|
246 | copyHostToDevice( gpu_tri_full_flag , c_tri_full_flag , number_of_tri_full_flag * sizeof(long) ); |
---|
247 | //copyHostToDevice( gpu_already_computed_flux, c_already_computed_flux, number_of_already_computed * sizeof(long) ); |
---|
248 | //copyHostToDevice( gpu_max_speed_array , c_max_speed_array , number_of_max_speed_array * sizeof(double) ); |
---|
249 | |
---|
250 | copyHostToDevice( gpu_stage_edge_values , c_stage_edge_values , number_of_stage_edge_values * sizeof(double) ); |
---|
251 | copyHostToDevice( gpu_xmom_edge_values , c_xmom_edge_values , number_of_xmom_edge_values * sizeof(double) ); |
---|
252 | copyHostToDevice( gpu_ymom_edge_values , c_ymom_edge_values , number_of_ymom_edge_values * sizeof(double) ); |
---|
253 | copyHostToDevice( gpu_bed_edge_values , c_bed_edge_values , number_of_bed_edge_values * sizeof(double) ); |
---|
254 | |
---|
255 | copyHostToDevice( gpu_stage_boundary_values ,c_stage_boundary_values ,number_of_stage_boundary_values * sizeof(double) ); |
---|
256 | copyHostToDevice( gpu_xmom_boundary_values ,c_xmom_boundary_values ,number_of_xmom_boundary_values * sizeof(double) ); |
---|
257 | copyHostToDevice( gpu_ymom_boundary_values ,c_ymom_boundary_values ,number_of_ymom_boundary_values * sizeof(double) ); |
---|
258 | |
---|
259 | /*copyHostToDevice( gpu_stage_explicit_update ,c_stage_explicit_update ,number_of_stage_explicit_update* sizeof(double) ); |
---|
260 | copyHostToDevice( gpu_xmom_explicit_update ,c_xmom_explicit_update ,number_of_xmom_explicit_update * sizeof(double) ); |
---|
261 | copyHostToDevice( gpu_ymom_explicit_update ,c_ymom_explicit_update ,number_of_ymom_explicit_update * sizeof(double) );*/ |
---|
262 | |
---|
263 | // initialize explicit updates to zero (possibly superfluous) |
---|
264 | _set_to_default( gpu_stage_explicit_update, gpu_xmom_explicit_update, gpu_ymom_explicit_update, number_of_stage_explicit_update, 0.0 ); |
---|
265 | |
---|
266 | |
---|
267 | // Call underlying flux computation routine and update |
---|
268 | // the explicit update arrays |
---|
269 | timestep = _compute_fluxes_central(number_of_elements, |
---|
270 | timestep, |
---|
271 | epsilon, |
---|
272 | H0, |
---|
273 | g, |
---|
274 | gpu_neighbours , |
---|
275 | gpu_neighbour_edges , |
---|
276 | gpu_normals , |
---|
277 | gpu_edgelengths , |
---|
278 | gpu_radii , |
---|
279 | gpu_areas , |
---|
280 | gpu_tri_full_flag , |
---|
281 | gpu_stage_edge_values , |
---|
282 | gpu_xmom_edge_values , |
---|
283 | gpu_ymom_edge_values , |
---|
284 | gpu_bed_edge_values , |
---|
285 | gpu_stage_boundary_values , |
---|
286 | gpu_xmom_boundary_values , |
---|
287 | gpu_ymom_boundary_values , |
---|
288 | gpu_stage_explicit_update , |
---|
289 | gpu_xmom_explicit_update , |
---|
290 | gpu_ymom_explicit_update , |
---|
291 | //gpu_already_computed_flux , |
---|
292 | gpu_max_speed_array , |
---|
293 | optimise_dry_cells); |
---|
294 | |
---|
295 | /* (long*) neighbours -> data, |
---|
296 | (long*) neighbour_edges -> data, |
---|
297 | (double*) normals -> data, |
---|
298 | (double*) edgelengths -> data, |
---|
299 | (double*) radii -> data, |
---|
300 | (double*) areas -> data, |
---|
301 | (long*) tri_full_flag -> data, |
---|
302 | (double*) stage_edge_values -> data, |
---|
303 | (double*) xmom_edge_values -> data, |
---|
304 | (double*) ymom_edge_values -> data, |
---|
305 | (double*) bed_edge_values -> data, |
---|
306 | (double*) stage_boundary_values -> data, |
---|
307 | (double*) xmom_boundary_values -> data, |
---|
308 | (double*) ymom_boundary_values -> data, |
---|
309 | (double*) stage_explicit_update -> data, |
---|
310 | (double*) xmom_explicit_update -> data, |
---|
311 | (double*) ymom_explicit_update -> data, |
---|
312 | (long*) already_computed_flux -> data, |
---|
313 | (double*) max_speed_array -> data, |
---|
314 | optimise_dry_cells); |
---|
315 | */ |
---|
316 | |
---|
317 | // copy GPU to Host memory |
---|
318 | /*copyDeviceToHost( c_neighbours , gpu_neighbours , number_of_neighbours * sizeof(long) ); |
---|
319 | copyDeviceToHost( c_neighbour_edges , gpu_neighbour_edges , number_of_neighbour_edges * sizeof(long) ); |
---|
320 | copyDeviceToHost( c_normals , gpu_normals , number_of_normals * sizeof(double) ); |
---|
321 | copyDeviceToHost( c_edgelengths , gpu_edgelengths , number_of_edgelengths * sizeof(double) ); |
---|
322 | copyDeviceToHost( c_radii , gpu_radii , number_of_radii * sizeof(double) ); |
---|
323 | copyDeviceToHost( c_areas , gpu_areas , number_of_areas * sizeof(double) ); |
---|
324 | copyDeviceToHost( c_tri_full_flag , gpu_tri_full_flag , number_of_tri_full_flag * sizeof(long) ); |
---|
325 | //copyDeviceToHost( c_already_computed_flux, gpu_already_computed_flux, number_of_already_computed * sizeof(long) ); |
---|
326 | copyDeviceToHost( c_max_speed_array , gpu_max_speed_array , number_of_max_speed_array * sizeof(double) ); |
---|
327 | |
---|
328 | copyDeviceToHost( c_stage_edge_values , gpu_stage_edge_values , number_of_stage_edge_values * sizeof(double) ); |
---|
329 | copyDeviceToHost( c_xmom_edge_values , gpu_xmom_edge_values , number_of_xmom_edge_values * sizeof(double) ); |
---|
330 | copyDeviceToHost( c_ymom_edge_values , gpu_ymom_edge_values , number_of_ymom_edge_values * sizeof(double) ); |
---|
331 | copyDeviceToHost( c_bed_edge_values , gpu_bed_edge_values , number_of_bed_edge_values * sizeof(double) ); |
---|
332 | |
---|
333 | copyDeviceToHost( c_stage_boundary_values ,gpu_stage_boundary_values , number_of_stage_boundary_values * sizeof(double) ); |
---|
334 | copyDeviceToHost( c_xmom_boundary_values ,gpu_xmom_boundary_values , number_of_xmom_boundary_values * sizeof(double) ); |
---|
335 | copyDeviceToHost( c_ymom_boundary_values ,gpu_ymom_boundary_values , number_of_ymom_boundary_values * sizeof(double) ); |
---|
336 | */ |
---|
337 | copyDeviceToHost( c_stage_explicit_update ,gpu_stage_explicit_update , number_of_stage_explicit_update* sizeof(double) ); |
---|
338 | copyDeviceToHost( c_xmom_explicit_update ,gpu_xmom_explicit_update , number_of_xmom_explicit_update * sizeof(double) ); |
---|
339 | copyDeviceToHost( c_ymom_explicit_update ,gpu_ymom_explicit_update , number_of_ymom_explicit_update * sizeof(double) ); |
---|
340 | |
---|
341 | if( iteration == final_iter ) { |
---|
342 | // Free gpu memory |
---|
343 | freeDeviceMemory( gpu_neighbours ); |
---|
344 | freeDeviceMemory( gpu_neighbour_edges ); |
---|
345 | freeDeviceMemory( gpu_normals ); |
---|
346 | freeDeviceMemory( gpu_edgelengths); |
---|
347 | freeDeviceMemory( gpu_radii ); |
---|
348 | freeDeviceMemory( gpu_areas ); |
---|
349 | freeDeviceMemory( gpu_tri_full_flag ); |
---|
350 | //freeDeviceMemory( gpu_already_computed_flux); |
---|
351 | freeDeviceMemory( gpu_max_speed_array ); |
---|
352 | |
---|
353 | freeDeviceMemory( gpu_stage_edge_values ); |
---|
354 | freeDeviceMemory( gpu_xmom_edge_values ); |
---|
355 | freeDeviceMemory( gpu_ymom_edge_values ); |
---|
356 | freeDeviceMemory( gpu_bed_edge_values ); |
---|
357 | |
---|
358 | freeDeviceMemory( gpu_stage_boundary_values ); |
---|
359 | freeDeviceMemory( gpu_xmom_boundary_values ); |
---|
360 | freeDeviceMemory( gpu_ymom_boundary_values ); |
---|
361 | |
---|
362 | freeDeviceMemory( gpu_stage_explicit_update); |
---|
363 | freeDeviceMemory( gpu_xmom_explicit_update ); |
---|
364 | freeDeviceMemory( gpu_ymom_explicit_update ); |
---|
365 | } |
---|
366 | |
---|
367 | iteration++; |
---|
368 | |
---|
369 | Py_DECREF(neighbours); |
---|
370 | Py_DECREF(neighbour_edges); |
---|
371 | Py_DECREF(normals); |
---|
372 | Py_DECREF(edgelengths); |
---|
373 | Py_DECREF(radii); |
---|
374 | Py_DECREF(areas); |
---|
375 | Py_DECREF(tri_full_flag); |
---|
376 | Py_DECREF(already_computed_flux); |
---|
377 | Py_DECREF(max_speed_array); |
---|
378 | Py_DECREF(stage_edge_values); |
---|
379 | Py_DECREF(xmom_edge_values); |
---|
380 | Py_DECREF(ymom_edge_values); |
---|
381 | Py_DECREF(bed_edge_values); |
---|
382 | Py_DECREF(stage_boundary_values); |
---|
383 | Py_DECREF(xmom_boundary_values); |
---|
384 | Py_DECREF(ymom_boundary_values); |
---|
385 | Py_DECREF(stage_explicit_update); |
---|
386 | Py_DECREF(xmom_explicit_update); |
---|
387 | Py_DECREF(ymom_explicit_update); |
---|
388 | |
---|
389 | |
---|
390 | // Return updated flux timestep |
---|
391 | return Py_BuildValue("d", timestep); |
---|
392 | } |
---|
393 | |
---|
394 | |
---|
395 | //------------------------------- |
---|
396 | // Method table for python module |
---|
397 | //------------------------------- |
---|
398 | static struct PyMethodDef MethodTable[] = { |
---|
399 | /* The cast of the function is necessary since PyCFunction values |
---|
400 | * only take two PyObject* parameters, and rotate() takes |
---|
401 | * three. |
---|
402 | */ |
---|
403 | |
---|
404 | {"compute_fluxes_ext_central_new_gpu", compute_fluxes_ext_central_new_gpu, METH_VARARGS, "Print out"}, |
---|
405 | {NULL, NULL} |
---|
406 | }; |
---|
407 | |
---|
408 | // Module initialisation |
---|
409 | void initgpu_python_glue(void){ |
---|
410 | Py_InitModule("gpu_python_glue", MethodTable); |
---|
411 | |
---|
412 | import_array(); // Necessary for handling of NumPY structures |
---|
413 | } |
---|
414 | |
---|