[8176] | 1 | |
---|
| 2 | // Python - C extension module for gpu_domain.py |
---|
| 3 | // |
---|
| 4 | // To compile (Python2.3): |
---|
| 5 | // gcc -c %.c -I/usr/include/python2.3 -o %.o -Wall -O |
---|
| 6 | // gcc -shared %.o -o %.so cudafun.o -L$(CUDA_INSTALL_PATH)/lib64 -lcudart -lm |
---|
| 7 | // |
---|
| 8 | // or use python compile.py |
---|
| 9 | // |
---|
| 10 | // See the module shallow_water_domain.py for more documentation on |
---|
| 11 | // how to use this module |
---|
| 12 | // |
---|
| 13 | // Includes Cuda GPU usage: |
---|
| 14 | // memory allocation in first iteration, copying, and deletion in final |
---|
| 15 | // iteration; |
---|
| 16 | // requires linkage of cudafun.o, CUDA library |
---|
| 17 | // |
---|
| 18 | |
---|
| 19 | |
---|
| 20 | #include "Python.h" |
---|
| 21 | #include "numpy/arrayobject.h" |
---|
| 22 | #include "math.h" |
---|
| 23 | #include <stdio.h> |
---|
| 24 | #include <stdlib.h> |
---|
| 25 | #include "numpy_shim.h" |
---|
| 26 | #include "util_ext.h" |
---|
| 27 | |
---|
| 28 | #include "cudafun.h" |
---|
| 29 | |
---|
| 30 | //========================================================================= |
---|
| 31 | // Python Glue |
---|
| 32 | //========================================================================= |
---|
| 33 | |
---|
| 34 | PyObject *compute_fluxes_ext_central_new_gpu(PyObject *self, PyObject *args) { |
---|
| 35 | /*Compute all fluxes and the timestep suitable for all volumes |
---|
| 36 | in domain. GPU version. |
---|
| 37 | |
---|
| 38 | Compute total flux for each conserved quantity using "flux_function_central" |
---|
| 39 | |
---|
| 40 | Fluxes across each edge are scaled by edgelengths and summed up |
---|
| 41 | Resulting flux is then scaled by area and stored in |
---|
| 42 | explicit_update for each of the three conserved quantities |
---|
| 43 | stage, xmomentum and ymomentum |
---|
| 44 | |
---|
| 45 | The maximal allowable speed computed by the flux_function for each volume |
---|
| 46 | is converted to a timestep that must not be exceeded. The minimum of |
---|
| 47 | those is computed as the next overall timestep. |
---|
| 48 | |
---|
| 49 | First set up data structures on GPU, then copy data, call computational kernel, |
---|
| 50 | copy back. |
---|
| 51 | |
---|
| 52 | Python call: |
---|
| 53 | timestep = compute_fluxes(timestep, domain, stage, xmom, ymom, bed) |
---|
| 54 | |
---|
| 55 | |
---|
| 56 | Post conditions: |
---|
| 57 | domain.explicit_update is reset to computed flux values |
---|
| 58 | returns timestep which is the largest step satisfying all volumes. |
---|
| 59 | |
---|
| 60 | |
---|
| 61 | */ |
---|
| 62 | PyObject |
---|
| 63 | *domain, |
---|
| 64 | *stage, |
---|
| 65 | *xmom, |
---|
| 66 | *ymom, |
---|
| 67 | *bed; |
---|
| 68 | |
---|
| 69 | PyArrayObject |
---|
| 70 | *neighbours, |
---|
| 71 | *neighbour_edges, |
---|
| 72 | *normals, |
---|
| 73 | *edgelengths, |
---|
| 74 | *radii, |
---|
| 75 | *areas, |
---|
| 76 | *tri_full_flag, |
---|
| 77 | *stage_edge_values, |
---|
| 78 | *xmom_edge_values, |
---|
| 79 | *ymom_edge_values, |
---|
| 80 | *bed_edge_values, |
---|
| 81 | *stage_boundary_values, |
---|
| 82 | *xmom_boundary_values, |
---|
| 83 | *ymom_boundary_values, |
---|
| 84 | *stage_explicit_update, |
---|
| 85 | *xmom_explicit_update, |
---|
| 86 | *ymom_explicit_update, |
---|
| 87 | *already_computed_flux, //Tracks whether the flux across an edge has already been computed |
---|
| 88 | *max_speed_array; //Keeps track of max speeds for each triangle |
---|
| 89 | |
---|
| 90 | static int iteration = 0; |
---|
| 91 | // TODO: get final_iteration |
---|
| 92 | int final_iter = 1; |
---|
| 93 | double timestep, epsilon, H0, g; |
---|
| 94 | int i,optimise_dry_cells = 0; |
---|
| 95 | |
---|
| 96 | // Convert Python arguments to C |
---|
| 97 | if (!PyArg_ParseTuple(args, "dOOOOO", ×tep, &domain, &stage, &xmom, &ymom, &bed )) { |
---|
| 98 | report_python_error(AT, "could not parse input arguments"); |
---|
| 99 | return NULL; |
---|
| 100 | } |
---|
| 101 | |
---|
| 102 | epsilon = get_python_double(domain,"epsilon"); |
---|
| 103 | H0 = get_python_double(domain,"H0"); |
---|
| 104 | g = get_python_double(domain,"g"); |
---|
| 105 | //optimise_dry_cells = get_python_integer(domain,"optimse_dry_cells"); |
---|
| 106 | |
---|
| 107 | neighbours = get_consecutive_array(domain, "neighbours"); |
---|
| 108 | neighbour_edges = get_consecutive_array(domain, "neighbour_edges"); |
---|
| 109 | normals = get_consecutive_array(domain, "normals"); |
---|
| 110 | edgelengths = get_consecutive_array(domain, "edgelengths"); |
---|
| 111 | radii = get_consecutive_array(domain, "radii"); |
---|
| 112 | areas = get_consecutive_array(domain, "areas"); |
---|
| 113 | tri_full_flag = get_consecutive_array(domain, "tri_full_flag"); |
---|
| 114 | already_computed_flux = get_consecutive_array(domain, "already_computed_flux"); |
---|
| 115 | max_speed_array = get_consecutive_array(domain, "max_speed"); |
---|
| 116 | |
---|
| 117 | stage_edge_values = get_consecutive_array(stage, "edge_values"); |
---|
| 118 | xmom_edge_values = get_consecutive_array(xmom, "edge_values"); |
---|
| 119 | ymom_edge_values = get_consecutive_array(ymom, "edge_values"); |
---|
| 120 | bed_edge_values = get_consecutive_array(bed, "edge_values"); |
---|
| 121 | |
---|
| 122 | stage_boundary_values = get_consecutive_array(stage, "boundary_values"); |
---|
| 123 | xmom_boundary_values = get_consecutive_array(xmom, "boundary_values"); |
---|
| 124 | ymom_boundary_values = get_consecutive_array(ymom, "boundary_values"); |
---|
| 125 | |
---|
| 126 | stage_explicit_update = get_consecutive_array(stage, "explicit_update"); |
---|
| 127 | xmom_explicit_update = get_consecutive_array(xmom, "explicit_update"); |
---|
| 128 | ymom_explicit_update = get_consecutive_array(ymom, "explicit_update"); |
---|
| 129 | |
---|
| 130 | int number_of_elements = stage_edge_values -> dimensions[0]; |
---|
| 131 | |
---|
| 132 | printf("glue: dt: %f, noe: %i, eps: %f, H0: %f, g: %f\n", |
---|
| 133 | timestep, number_of_elements, epsilon, H0, g); fflush(stdout); |
---|
| 134 | |
---|
| 135 | // get array dimensions |
---|
| 136 | size_t number_of_neighbours = neighbours -> dimensions[0]; |
---|
| 137 | size_t number_of_neighbour_edges = neighbour_edges -> dimensions[0]; |
---|
| 138 | size_t number_of_normals = normals -> dimensions[0]; |
---|
| 139 | size_t number_of_edgelengths = edgelengths -> dimensions[0]; |
---|
| 140 | size_t number_of_radii = radii -> dimensions[0]; |
---|
| 141 | size_t number_of_areas = areas -> dimensions[0]; |
---|
| 142 | size_t number_of_tri_full_flag = tri_full_flag -> dimensions[0]; |
---|
| 143 | //size_t number_of_already_computed = already_computed_flux -> dimensions[0]; |
---|
| 144 | size_t number_of_max_speed_array = max_speed_array -> dimensions[0]; |
---|
| 145 | |
---|
| 146 | size_t number_of_stage_edge_values = stage_edge_values -> dimensions[0]; |
---|
| 147 | size_t number_of_xmom_edge_values = xmom_edge_values -> dimensions[0]; |
---|
| 148 | size_t number_of_ymom_edge_values = ymom_edge_values -> dimensions[0]; |
---|
| 149 | size_t number_of_bed_edge_values = bed_edge_values -> dimensions[0]; |
---|
| 150 | |
---|
| 151 | size_t number_of_stage_boundary_values = stage_boundary_values -> dimensions[0]; |
---|
| 152 | size_t number_of_xmom_boundary_values = xmom_boundary_values -> dimensions[0]; |
---|
| 153 | size_t number_of_ymom_boundary_values = ymom_boundary_values -> dimensions[0]; |
---|
| 154 | |
---|
| 155 | size_t number_of_stage_explicit_update = stage_explicit_update -> dimensions[0]; |
---|
| 156 | size_t number_of_xmom_explicit_update = xmom_explicit_update -> dimensions[0]; |
---|
| 157 | size_t number_of_ymom_explicit_update = ymom_explicit_update -> dimensions[0]; |
---|
| 158 | |
---|
| 159 | |
---|
| 160 | // extract C arrays from python wrapper |
---|
| 161 | long* c_neighbours = (long*) neighbours -> data; |
---|
| 162 | long* c_neighbour_edges = (long*) neighbour_edges -> data; |
---|
| 163 | double* c_normals = (double*) normals -> data; |
---|
| 164 | double* c_edgelengths = (double*) edgelengths -> data; |
---|
| 165 | double* c_radii = (double*) radii -> data; |
---|
| 166 | double* c_areas = (double*) areas -> data; |
---|
| 167 | long* c_tri_full_flag = (long*) tri_full_flag -> data; |
---|
| 168 | //long* c_already_computed_flux = (long*) already_computed_flux -> data; |
---|
| 169 | double* c_max_speed_array = (double*) max_speed_array -> data; |
---|
| 170 | |
---|
| 171 | double* c_stage_edge_values = (double*) stage_edge_values -> data; |
---|
| 172 | double* c_xmom_edge_values = (double*) xmom_edge_values -> data; |
---|
| 173 | double* c_ymom_edge_values = (double*) ymom_edge_values -> data; |
---|
| 174 | double* c_bed_edge_values = (double*) bed_edge_values -> data; |
---|
| 175 | |
---|
| 176 | double* c_stage_boundary_values = (double*) stage_boundary_values -> data; |
---|
| 177 | double* c_xmom_boundary_values = (double*) xmom_boundary_values -> data; |
---|
| 178 | double* c_ymom_boundary_values = (double*) ymom_boundary_values -> data; |
---|
| 179 | |
---|
| 180 | double* c_stage_explicit_update = (double*) stage_explicit_update -> data; |
---|
| 181 | double* c_xmom_explicit_update = (double*) xmom_explicit_update -> data; |
---|
| 182 | double* c_ymom_explicit_update = (double*) ymom_explicit_update -> data; |
---|
| 183 | |
---|
| 184 | setKernelDims( 32, 96 ); |
---|
| 185 | printKernelDims(); |
---|
| 186 | |
---|
| 187 | static long* gpu_neighbours = NULL; |
---|
| 188 | static long* gpu_neighbour_edges = NULL; |
---|
| 189 | static double* gpu_normals = NULL; |
---|
| 190 | static double* gpu_edgelengths = NULL; |
---|
| 191 | static double* gpu_radii = NULL; |
---|
| 192 | static double* gpu_areas = NULL; |
---|
| 193 | static long* gpu_tri_full_flag = NULL; |
---|
| 194 | //static long* gpu_already_computed_flux = NULL; |
---|
| 195 | static double* gpu_max_speed_array = NULL; |
---|
| 196 | |
---|
| 197 | static double* gpu_stage_edge_values = NULL; |
---|
| 198 | static double* gpu_xmom_edge_values = NULL; |
---|
| 199 | static double* gpu_ymom_edge_values = NULL; |
---|
| 200 | static double* gpu_bed_edge_values = NULL; |
---|
| 201 | |
---|
| 202 | static double* gpu_stage_boundary_values = NULL; |
---|
| 203 | static double* gpu_xmom_boundary_values = NULL; |
---|
| 204 | static double* gpu_ymom_boundary_values = NULL; |
---|
| 205 | |
---|
| 206 | static double* gpu_stage_explicit_update = NULL; |
---|
| 207 | static double* gpu_xmom_explicit_update = NULL; |
---|
| 208 | static double* gpu_ymom_explicit_update = NULL; |
---|
| 209 | |
---|
| 210 | |
---|
| 211 | if( 0 == iteration ) { |
---|
| 212 | selectDevice(0,1,""); |
---|
| 213 | // allocate GPU arrays |
---|
| 214 | gpu_neighbours = (long*) allocDeviceMemory( number_of_neighbours * sizeof(long) ); |
---|
| 215 | gpu_neighbour_edges = (long*) allocDeviceMemory( number_of_neighbour_edges * sizeof(long) ); |
---|
| 216 | gpu_normals = (double*) allocDeviceMemory( number_of_normals * sizeof(double) ); |
---|
| 217 | gpu_edgelengths = (double*) allocDeviceMemory( number_of_edgelengths * sizeof(double) ); |
---|
| 218 | gpu_radii = (double*) allocDeviceMemory( number_of_radii * sizeof(double) ); |
---|
| 219 | gpu_areas = (double*) allocDeviceMemory( number_of_areas * sizeof(double) ); |
---|
| 220 | gpu_tri_full_flag = (long*) allocDeviceMemory( number_of_tri_full_flag * sizeof(long) ); |
---|
| 221 | //gpu_already_computed_flux = (long*) allocDeviceMemory( number_of_already_computed * sizeof(long) ); |
---|
| 222 | gpu_max_speed_array = (double*) allocDeviceMemory( number_of_max_speed_array * sizeof(double) ); |
---|
| 223 | |
---|
| 224 | gpu_stage_edge_values = (double*) allocDeviceMemory( number_of_stage_edge_values * sizeof(double) ); |
---|
| 225 | gpu_xmom_edge_values = (double*) allocDeviceMemory( number_of_xmom_edge_values * sizeof(double) ); |
---|
| 226 | gpu_ymom_edge_values = (double*) allocDeviceMemory( number_of_ymom_edge_values * sizeof(double) ); |
---|
| 227 | gpu_bed_edge_values = (double*) allocDeviceMemory( number_of_bed_edge_values * sizeof(double) ); |
---|
| 228 | |
---|
| 229 | gpu_stage_boundary_values = (double*) allocDeviceMemory( number_of_stage_boundary_values * sizeof(double) ); |
---|
| 230 | gpu_xmom_boundary_values = (double*) allocDeviceMemory( number_of_xmom_boundary_values * sizeof(double) ); |
---|
| 231 | gpu_ymom_boundary_values = (double*) allocDeviceMemory( number_of_ymom_boundary_values * sizeof(double) ); |
---|
| 232 | |
---|
| 233 | gpu_stage_explicit_update = (double*) allocDeviceMemory( number_of_stage_explicit_update* sizeof(double) ); |
---|
| 234 | gpu_xmom_explicit_update = (double*) allocDeviceMemory( number_of_xmom_explicit_update * sizeof(double) ); |
---|
| 235 | gpu_ymom_explicit_update = (double*) allocDeviceMemory( number_of_ymom_explicit_update * sizeof(double) ); |
---|
| 236 | |
---|
| 237 | // Constant quantities copied only in first iteration |
---|
| 238 | copyHostToDevice( gpu_neighbours , c_neighbours , number_of_neighbours * sizeof(long) ); |
---|
| 239 | copyHostToDevice( gpu_neighbour_edges , c_neighbour_edges , number_of_neighbour_edges * sizeof(long) ); |
---|
| 240 | copyHostToDevice( gpu_normals , c_normals , number_of_normals * sizeof(double) ); |
---|
| 241 | copyHostToDevice( gpu_edgelengths , c_edgelengths , number_of_edgelengths * sizeof(double) ); |
---|
| 242 | copyHostToDevice( gpu_radii , c_radii , number_of_radii * sizeof(double) ); |
---|
| 243 | copyHostToDevice( gpu_areas , c_areas , number_of_areas * sizeof(double) ); |
---|
| 244 | } |
---|
| 245 | |
---|
| 246 | copyHostToDevice( gpu_tri_full_flag , c_tri_full_flag , number_of_tri_full_flag * sizeof(long) ); |
---|
| 247 | //copyHostToDevice( gpu_already_computed_flux, c_already_computed_flux, number_of_already_computed * sizeof(long) ); |
---|
| 248 | //copyHostToDevice( gpu_max_speed_array , c_max_speed_array , number_of_max_speed_array * sizeof(double) ); |
---|
| 249 | |
---|
| 250 | copyHostToDevice( gpu_stage_edge_values , c_stage_edge_values , number_of_stage_edge_values * sizeof(double) ); |
---|
| 251 | copyHostToDevice( gpu_xmom_edge_values , c_xmom_edge_values , number_of_xmom_edge_values * sizeof(double) ); |
---|
| 252 | copyHostToDevice( gpu_ymom_edge_values , c_ymom_edge_values , number_of_ymom_edge_values * sizeof(double) ); |
---|
| 253 | copyHostToDevice( gpu_bed_edge_values , c_bed_edge_values , number_of_bed_edge_values * sizeof(double) ); |
---|
| 254 | |
---|
| 255 | copyHostToDevice( gpu_stage_boundary_values ,c_stage_boundary_values ,number_of_stage_boundary_values * sizeof(double) ); |
---|
| 256 | copyHostToDevice( gpu_xmom_boundary_values ,c_xmom_boundary_values ,number_of_xmom_boundary_values * sizeof(double) ); |
---|
| 257 | copyHostToDevice( gpu_ymom_boundary_values ,c_ymom_boundary_values ,number_of_ymom_boundary_values * sizeof(double) ); |
---|
| 258 | |
---|
| 259 | /*copyHostToDevice( gpu_stage_explicit_update ,c_stage_explicit_update ,number_of_stage_explicit_update* sizeof(double) ); |
---|
| 260 | copyHostToDevice( gpu_xmom_explicit_update ,c_xmom_explicit_update ,number_of_xmom_explicit_update * sizeof(double) ); |
---|
| 261 | copyHostToDevice( gpu_ymom_explicit_update ,c_ymom_explicit_update ,number_of_ymom_explicit_update * sizeof(double) );*/ |
---|
| 262 | |
---|
| 263 | // initialize explicit updates to zero (possibly superfluous) |
---|
| 264 | _set_to_default( gpu_stage_explicit_update, gpu_xmom_explicit_update, gpu_ymom_explicit_update, number_of_stage_explicit_update, 0.0 ); |
---|
| 265 | |
---|
| 266 | |
---|
| 267 | // Call underlying flux computation routine and update |
---|
| 268 | // the explicit update arrays |
---|
| 269 | timestep = _compute_fluxes_central(number_of_elements, |
---|
| 270 | timestep, |
---|
| 271 | epsilon, |
---|
| 272 | H0, |
---|
| 273 | g, |
---|
| 274 | gpu_neighbours , |
---|
| 275 | gpu_neighbour_edges , |
---|
| 276 | gpu_normals , |
---|
| 277 | gpu_edgelengths , |
---|
| 278 | gpu_radii , |
---|
| 279 | gpu_areas , |
---|
| 280 | gpu_tri_full_flag , |
---|
| 281 | gpu_stage_edge_values , |
---|
| 282 | gpu_xmom_edge_values , |
---|
| 283 | gpu_ymom_edge_values , |
---|
| 284 | gpu_bed_edge_values , |
---|
| 285 | gpu_stage_boundary_values , |
---|
| 286 | gpu_xmom_boundary_values , |
---|
| 287 | gpu_ymom_boundary_values , |
---|
| 288 | gpu_stage_explicit_update , |
---|
| 289 | gpu_xmom_explicit_update , |
---|
| 290 | gpu_ymom_explicit_update , |
---|
| 291 | //gpu_already_computed_flux , |
---|
| 292 | gpu_max_speed_array , |
---|
| 293 | optimise_dry_cells); |
---|
| 294 | |
---|
| 295 | /* (long*) neighbours -> data, |
---|
| 296 | (long*) neighbour_edges -> data, |
---|
| 297 | (double*) normals -> data, |
---|
| 298 | (double*) edgelengths -> data, |
---|
| 299 | (double*) radii -> data, |
---|
| 300 | (double*) areas -> data, |
---|
| 301 | (long*) tri_full_flag -> data, |
---|
| 302 | (double*) stage_edge_values -> data, |
---|
| 303 | (double*) xmom_edge_values -> data, |
---|
| 304 | (double*) ymom_edge_values -> data, |
---|
| 305 | (double*) bed_edge_values -> data, |
---|
| 306 | (double*) stage_boundary_values -> data, |
---|
| 307 | (double*) xmom_boundary_values -> data, |
---|
| 308 | (double*) ymom_boundary_values -> data, |
---|
| 309 | (double*) stage_explicit_update -> data, |
---|
| 310 | (double*) xmom_explicit_update -> data, |
---|
| 311 | (double*) ymom_explicit_update -> data, |
---|
| 312 | (long*) already_computed_flux -> data, |
---|
| 313 | (double*) max_speed_array -> data, |
---|
| 314 | optimise_dry_cells); |
---|
| 315 | */ |
---|
| 316 | |
---|
| 317 | // copy GPU to Host memory |
---|
| 318 | /*copyDeviceToHost( c_neighbours , gpu_neighbours , number_of_neighbours * sizeof(long) ); |
---|
| 319 | copyDeviceToHost( c_neighbour_edges , gpu_neighbour_edges , number_of_neighbour_edges * sizeof(long) ); |
---|
| 320 | copyDeviceToHost( c_normals , gpu_normals , number_of_normals * sizeof(double) ); |
---|
| 321 | copyDeviceToHost( c_edgelengths , gpu_edgelengths , number_of_edgelengths * sizeof(double) ); |
---|
| 322 | copyDeviceToHost( c_radii , gpu_radii , number_of_radii * sizeof(double) ); |
---|
| 323 | copyDeviceToHost( c_areas , gpu_areas , number_of_areas * sizeof(double) ); |
---|
| 324 | copyDeviceToHost( c_tri_full_flag , gpu_tri_full_flag , number_of_tri_full_flag * sizeof(long) ); |
---|
| 325 | //copyDeviceToHost( c_already_computed_flux, gpu_already_computed_flux, number_of_already_computed * sizeof(long) ); |
---|
| 326 | copyDeviceToHost( c_max_speed_array , gpu_max_speed_array , number_of_max_speed_array * sizeof(double) ); |
---|
| 327 | |
---|
| 328 | copyDeviceToHost( c_stage_edge_values , gpu_stage_edge_values , number_of_stage_edge_values * sizeof(double) ); |
---|
| 329 | copyDeviceToHost( c_xmom_edge_values , gpu_xmom_edge_values , number_of_xmom_edge_values * sizeof(double) ); |
---|
| 330 | copyDeviceToHost( c_ymom_edge_values , gpu_ymom_edge_values , number_of_ymom_edge_values * sizeof(double) ); |
---|
| 331 | copyDeviceToHost( c_bed_edge_values , gpu_bed_edge_values , number_of_bed_edge_values * sizeof(double) ); |
---|
| 332 | |
---|
| 333 | copyDeviceToHost( c_stage_boundary_values ,gpu_stage_boundary_values , number_of_stage_boundary_values * sizeof(double) ); |
---|
| 334 | copyDeviceToHost( c_xmom_boundary_values ,gpu_xmom_boundary_values , number_of_xmom_boundary_values * sizeof(double) ); |
---|
| 335 | copyDeviceToHost( c_ymom_boundary_values ,gpu_ymom_boundary_values , number_of_ymom_boundary_values * sizeof(double) ); |
---|
| 336 | */ |
---|
| 337 | copyDeviceToHost( c_stage_explicit_update ,gpu_stage_explicit_update , number_of_stage_explicit_update* sizeof(double) ); |
---|
| 338 | copyDeviceToHost( c_xmom_explicit_update ,gpu_xmom_explicit_update , number_of_xmom_explicit_update * sizeof(double) ); |
---|
| 339 | copyDeviceToHost( c_ymom_explicit_update ,gpu_ymom_explicit_update , number_of_ymom_explicit_update * sizeof(double) ); |
---|
| 340 | |
---|
| 341 | if( iteration == final_iter ) { |
---|
| 342 | // Free gpu memory |
---|
| 343 | freeDeviceMemory( gpu_neighbours ); |
---|
| 344 | freeDeviceMemory( gpu_neighbour_edges ); |
---|
| 345 | freeDeviceMemory( gpu_normals ); |
---|
| 346 | freeDeviceMemory( gpu_edgelengths); |
---|
| 347 | freeDeviceMemory( gpu_radii ); |
---|
| 348 | freeDeviceMemory( gpu_areas ); |
---|
| 349 | freeDeviceMemory( gpu_tri_full_flag ); |
---|
| 350 | //freeDeviceMemory( gpu_already_computed_flux); |
---|
| 351 | freeDeviceMemory( gpu_max_speed_array ); |
---|
| 352 | |
---|
| 353 | freeDeviceMemory( gpu_stage_edge_values ); |
---|
| 354 | freeDeviceMemory( gpu_xmom_edge_values ); |
---|
| 355 | freeDeviceMemory( gpu_ymom_edge_values ); |
---|
| 356 | freeDeviceMemory( gpu_bed_edge_values ); |
---|
| 357 | |
---|
| 358 | freeDeviceMemory( gpu_stage_boundary_values ); |
---|
| 359 | freeDeviceMemory( gpu_xmom_boundary_values ); |
---|
| 360 | freeDeviceMemory( gpu_ymom_boundary_values ); |
---|
| 361 | |
---|
| 362 | freeDeviceMemory( gpu_stage_explicit_update); |
---|
| 363 | freeDeviceMemory( gpu_xmom_explicit_update ); |
---|
| 364 | freeDeviceMemory( gpu_ymom_explicit_update ); |
---|
| 365 | } |
---|
| 366 | |
---|
| 367 | iteration++; |
---|
| 368 | |
---|
| 369 | Py_DECREF(neighbours); |
---|
| 370 | Py_DECREF(neighbour_edges); |
---|
| 371 | Py_DECREF(normals); |
---|
| 372 | Py_DECREF(edgelengths); |
---|
| 373 | Py_DECREF(radii); |
---|
| 374 | Py_DECREF(areas); |
---|
| 375 | Py_DECREF(tri_full_flag); |
---|
| 376 | Py_DECREF(already_computed_flux); |
---|
| 377 | Py_DECREF(max_speed_array); |
---|
| 378 | Py_DECREF(stage_edge_values); |
---|
| 379 | Py_DECREF(xmom_edge_values); |
---|
| 380 | Py_DECREF(ymom_edge_values); |
---|
| 381 | Py_DECREF(bed_edge_values); |
---|
| 382 | Py_DECREF(stage_boundary_values); |
---|
| 383 | Py_DECREF(xmom_boundary_values); |
---|
| 384 | Py_DECREF(ymom_boundary_values); |
---|
| 385 | Py_DECREF(stage_explicit_update); |
---|
| 386 | Py_DECREF(xmom_explicit_update); |
---|
| 387 | Py_DECREF(ymom_explicit_update); |
---|
| 388 | |
---|
| 389 | |
---|
| 390 | // Return updated flux timestep |
---|
| 391 | return Py_BuildValue("d", timestep); |
---|
| 392 | } |
---|
| 393 | |
---|
| 394 | |
---|
| 395 | //------------------------------- |
---|
| 396 | // Method table for python module |
---|
| 397 | //------------------------------- |
---|
| 398 | static struct PyMethodDef MethodTable[] = { |
---|
| 399 | /* The cast of the function is necessary since PyCFunction values |
---|
| 400 | * only take two PyObject* parameters, and rotate() takes |
---|
| 401 | * three. |
---|
| 402 | */ |
---|
| 403 | |
---|
| 404 | {"compute_fluxes_ext_central_new_gpu", compute_fluxes_ext_central_new_gpu, METH_VARARGS, "Print out"}, |
---|
| 405 | {NULL, NULL} |
---|
| 406 | }; |
---|
| 407 | |
---|
| 408 | // Module initialisation |
---|
| 409 | void initgpu_python_glue(void){ |
---|
| 410 | Py_InitModule("gpu_python_glue", MethodTable); |
---|
| 411 | |
---|
| 412 | import_array(); // Necessary for handling of NumPY structures |
---|
| 413 | } |
---|
| 414 | |
---|