source: trunk/anuga_work/anuga_cuda/src/anuga_HMPP/hmpp_fun.h @ 9329

Last change on this file since 9329 was 9017, checked in by steve, 12 years ago

Adding in Zhe (John) Weng's anuga_cuda code as obtained from googlecode https://code.google.com/p/anuga-cuda

File size: 31.0 KB
Line 
1// Using double floating variable
2#define USING_DOUBLE
3// When porting to Xe
4//#define ON_XE
5// Putting directives along with the implementation
6//#define USING_LOCAL_DIRECTIVES
7#define USING_GLOBAL_DIRECTIVES
8// Do not back and force between Python and C
9#define EVOLVE_ALL_IN_C
10
11#define USING_MIRROR_DATA
12
13#ifdef USING_CPP
14#include <iostream>
15#include <cstdio>
16#include <cstdlib>
17#include <cmath>
18#include <cassert>
19using namespace std;
20#else
21#include <stdio.h>
22#include <math.h>
23#include <stdlib.h>
24#include <assert.h>
25#include <string.h>
26#endif
27
28#ifdef USING_DOUBLE
29#define TOLERANCE 0.000000000000001
30#define DATA_TYPE double
31
32#else
33#define TOLERANCE 0.0000001
34#define DATA_TYPE float
35#endif
36
37
38// Shallow_water domain structure
39#include "sw_domain.h"
40
41
42
43
44int check_tolerance(DATA_TYPE a,DATA_TYPE b);
45
46double evolve(struct domain * D, double yieldstep, 
47            double finaltime, double duration,
48            double epsilon, int skip_initial_step,
49            int step);
50
51void allocate_upload_data(struct domain *D);
52
53void download_data(struct domain *D);
54
55int _distribute_to_vertices_and_edges(struct domain * D);
56
57int _extrapolate_second_order_sw(struct domain * D);
58
59void test_extrapolate_second_order_and_limit_by_vertex( struct domain *D);
60
61void test_extrapolate_second_order_and_limit_by_vertex_normal( struct domain *D);
62
63double compute_fluxes(struct domain * D);
64
65int update_boundary(struct domain * D);
66
67int update_ghosts(struct domain * D);
68
69int update_extrema(struct domain * D);
70
71
72
73
74#ifdef USING_GLOBAL_DIRECTIVES
75#ifdef USING_MIRROR_DATA
76#pragma hmpp gravity codelet, target=CUDA, transfer=atcall, &
77#pragma hmpp & args[xmom_explicit_update, ymom_explicit_update, &
78#pragma hmpp & stage_vertex_values, stage_edge_values, stage_centroid_values, &
79#pragma hmpp & bed_edge_values, bed_centroid_values, vertex_coordinates, &
80#pragma hmpp & normals, areas, edgelengths].mirror, &
81#pragma hmpp & args[xmom_explicit_update, ymom_explicit_update, &
82#pragma hmpp & stage_vertex_values, stage_edge_values, stage_centroid_values, &
83#pragma hmpp & bed_edge_values, bed_centroid_values, vertex_coordinates, &
84#pragma hmpp & normals, areas, edgelengths].transfer=manual
85#else
86#pragma hmpp gravity codelet, target=CUDA args[*].transfer=atcall
87#endif
88#endif
89void gravity_wb( 
90        int n, int n3, int n6, 
91        DATA_TYPE xmom_explicit_update[n], 
92        DATA_TYPE ymom_explicit_update[n], 
93
94        DATA_TYPE stage_vertex_values[n3],
95        DATA_TYPE stage_edge_values[n3],
96        DATA_TYPE stage_centroid_values[n],
97
98        DATA_TYPE bed_edge_values[n3],
99        DATA_TYPE bed_centroid_values[n],
100
101        DATA_TYPE vertex_coordinates[n6],
102
103        DATA_TYPE normals[n6],
104        DATA_TYPE areas[n],
105        DATA_TYPE edgelengths[n3],
106
107        DATA_TYPE g );
108
109
110
111#ifdef USING_GLOBAL_DIRECTIVES
112#ifdef USING_MIRROR_DATA
113#pragma hmpp cf_central codelet, target=CUDA, transfer=atcall, &
114#pragma hmpp & args[timestep, neighbours, neighbour_edges, normals, &
115#pragma hmpp & edgelengths, radii, areas, tri_full_flag, stage_edge_values, &
116#pragma hmpp & xmom_edge_values, ymom_edge_values, bed_edge_values, &
117#pragma hmpp & stage_boundary_values, xmom_boundary_values, &
118#pragma hmpp & ymom_boundary_values, stage_explicit_update, &
119#pragma hmpp & xmom_explicit_update, ymom_explicit_update, &
120#pragma hmpp & max_speed_array].mirror, &
121#pragma hmpp & args[timestep, neighbours, neighbour_edges, normals, &
122#pragma hmpp & edgelengths, radii, areas, tri_full_flag, stage_edge_values, &
123#pragma hmpp & xmom_edge_values, ymom_edge_values, bed_edge_values, &
124#pragma hmpp & stage_boundary_values, xmom_boundary_values, &
125#pragma hmpp & ymom_boundary_values, stage_explicit_update, &
126#pragma hmpp & xmom_explicit_update, ymom_explicit_update, &
127#pragma hmpp & max_speed_array].transfer=manual
128#else
129#pragma hmpp cf_central codelet, target=CUDA args[*].transfer=atcall
130#endif
131#endif
132void compute_fluxes_central_structure_CUDA(
133        int N,
134        int N3,
135        int N6,
136        int N2,
137
138        double timestep[N],
139        long neighbours[N3],
140        long neighbour_edges[N3],
141        double normals[N6],
142        double edgelengths[N3],
143        double radii[N],
144        double areas[N],
145        long tri_full_flag[N],
146        double stage_edge_values[N3],
147        double xmom_edge_values[N3],
148        double ymom_edge_values[N3],
149        double bed_edge_values[N3],
150
151        double stage_boundary_values[N2],
152        double xmom_boundary_values[N2],
153        double ymom_boundary_values[N2],
154        double stage_explicit_update[N],
155        double xmom_explicit_update[N],
156        double ymom_explicit_update[N],
157        double max_speed_array[N],
158
159        double evolve_max_timestep,
160        double g,
161        double epsilon,
162        double h0,
163        double limiting_threshold,
164        int optimise_dry_cells);
165
166
167
168#ifdef USING_GLOBAL_DIRECTIVES
169#ifdef USING_MIRROR_DATA
170#pragma hmpp cf_central_single codelet, target=CUDA, transfer=atcall &
171#pragma hmpp & args[timestep, neighbours, neighbour_edges, normals, &
172#pragma hmpp & edgelengths, radii, areas, tri_full_flag, stage_edge_values, &
173#pragma hmpp & xmom_edge_values, ymom_edge_values, bed_edge_values, &
174#pragma hmpp & stage_boundary_values, xmom_boundary_values, &
175#pragma hmpp & ymom_boundary_values, stage_explicit_update, &
176#pragma hmpp & xmom_explicit_update, ymom_explicit_update, &
177#pragma hmpp & max_speed_array].mirror &
178#pragma hmpp & args[timestep, neighbours, neighbour_edges, normals, &
179#pragma hmpp & edgelengths, radii, areas, tri_full_flag, stage_edge_values, &
180#pragma hmpp & xmom_edge_values, ymom_edge_values, bed_edge_values, &
181#pragma hmpp & stage_boundary_values, xmom_boundary_values, &
182#pragma hmpp & ymom_boundary_values, stage_explicit_update, &
183#pragma hmpp & xmom_explicit_update, ymom_explicit_update, &
184#pragma hmpp & max_speed_array].transfer=manual
185#else
186#pragma hmpp cf_central_single codelet, target=CUDA args[*].transfer=atcall
187#endif
188#endif
189void compute_fluxes_central_structure_cuda_single(
190        int N,
191        int N3,
192        int N6,
193        int N2,
194
195        double timestep[N],
196        int neighbours[N3],
197        int neighbour_edges[N3],
198        double normals[N6],
199        double edgelengths[N3],
200        double radii[N],
201        double areas[N],
202        int tri_full_flag[N],
203        double stage_edge_values[N3],
204        double xmom_edge_values[N3],
205        double ymom_edge_values[N3],
206        double bed_edge_values[N3],
207        double stage_boundary_values[N2],
208        double xmom_boundary_values[N2],
209        double ymom_boundary_values[N2],
210        double stage_explicit_update[N],
211        double xmom_explicit_update[N],
212        double ymom_explicit_update[N],
213        double max_speed_array[N],
214
215        double evolve_max_timestep,
216        double g,
217        double epsilon,
218        double h0,
219        double limiting_threshold,
220        int optimise_dry_cells);
221
222
223
224void gravity_wb_orig(
225        DATA_TYPE * xmom_explicit_update, 
226        DATA_TYPE * ymom_explicit_update, 
227        DATA_TYPE * stage_vertex_values, 
228        DATA_TYPE * stage_edge_values, 
229        DATA_TYPE * stage_centroid_values, 
230        DATA_TYPE * bed_edge_values, 
231        DATA_TYPE * bed_centroid_values, 
232        DATA_TYPE * vertex_coordinates, 
233        DATA_TYPE * normals, 
234        DATA_TYPE * areas, 
235        DATA_TYPE * edgelengths,
236        DATA_TYPE * test_xe,
237        DATA_TYPE * test_ye,
238        int N,
239        DATA_TYPE g);
240
241
242
243void gravity_call(
244        int n, int n3, int n6, 
245        DATA_TYPE xmom_explicit_update[n], 
246        DATA_TYPE ymom_explicit_update[n], 
247
248        DATA_TYPE stage_vertex_values[n3],
249        DATA_TYPE stage_edge_values[n3],
250        DATA_TYPE stage_centroid_values[n],
251
252        DATA_TYPE bed_edge_values[n3],
253        DATA_TYPE bed_centroid_values[n],
254
255        DATA_TYPE vertex_coordinates[n6],
256
257        DATA_TYPE normals[n6],
258        DATA_TYPE areas[n],
259        DATA_TYPE edgelengths[n3],
260
261        DATA_TYPE g );
262
263
264
265#ifdef USING_GLOBAL_DIRECTIVES
266#ifdef USING_MIRROR_DATA
267#pragma hmpp extraFstOrder codelet, target=CUDA, transfer=atcall, &
268#pragma hmpp & args[centroid_values, edge_values, vertex_values].mirror, &
269#pragma hmpp & args[centroid_values, edge_values, vertex_values].transfer=manual
270#else
271#pragma hmpp extraFstOrder codelet, target=CUDA args[*].transfer=atcall
272#endif
273#endif
274void extrapolate_first_order(
275        int N,
276        int N3,
277        double centroid_values[N],
278        double edge_values[N3],
279        double vertex_values[N3]
280        );
281
282
283
284// swb2_domain.c
285// Not used so far
286#ifdef USING_GLOBAL_DIRECTIVES
287#pragma hmpp extraSndOrderEdge codelet, target=CUDA args[*].transfer=atcall
288#endif
289void extrapolate_second_order_edge_sw(
290        int number_of_elements,
291        int optimise_dry_cells, 
292        int extrapolate_velocity_second_order, 
293
294        double epsilon,
295        double minimum_allowed_height,
296        double beta_w,
297        double beta_w_dry,
298        double beta_uh,
299        double beta_uh_dry,
300        double beta_vh,
301        double beta_vh_dry,
302
303        long* surrogate_neighbours,
304        long* number_of_boundaries,
305
306        double* centroid_coordinates,
307
308        double* stage_centroid_values,
309        double* elevation_centroid_values,
310        double* xmom_centroid_values,
311        double* ymom_centroid_values,
312
313        double* edge_coordinates,
314
315        double* stage_edge_values,
316        double* elevation_edge_values,
317        double* xmom_edge_values,
318        double* ymom_edge_values,
319
320        double* stage_vertex_values,
321        double* xmom_vertex_values,
322        double* ymom_vertex_values,
323        double* elevation_vertex_values,
324
325        double* stage_centroid_store,
326        double* xmom_centroid_store,
327        double* ymom_centroid_store,
328        double* min_elevation_edgevalue,
329        double* max_elevation_edgevalue,
330        int* count_wet_neighbours
331        );
332
333
334
335//#pragma hmpp extraSndOrderLmtV codelet, target=CUDA args[*].transfer=atcall
336void extrapolate_second_order_and_limit_by_vertex(
337        int N,
338        int N2,
339        int N3,
340        int N6,
341        double beta,
342        double * domain_centroid_coordinates,
343        double * domain_vertex_coordinates,
344        long * domain_number_of_boundaries,
345        long * domain_surrogate_neighbours,
346        long * domain_neighbours,
347
348        double * quantity_centroid_values,
349        double * quantity_vertex_values,
350        double * quantity_edge_values,
351        double * quantity_x_gradient,
352        double * quantity_y_gradient
353        );
354
355
356
357void extrapolate_second_order_and_limit_by_vertex_normal(
358        int N,
359        int N2,
360        int N3,
361        int N6,
362        double beta,
363
364        double domain_centroid_coordinates[N2],
365        double domain_vertex_coordinates[N6],
366        long domain_number_of_boundaries[N],
367        long domain_surrogate_neighbours[N3],
368        long domain_neighbours[N3],
369
370        double quantity_centroid_values[N],
371        double quantity_vertex_values[N3],
372        double quantity_edge_values[N3],
373        double quantity_x_gradient[N],
374        double quantity_y_gradient[N]
375        );
376
377
378#ifdef TESTING_EXTRA_2_LV_GROUP
379#pragma hmpp <extra2LV> group, target=CUDA
380
381#pragma hmpp <extra2LV> map, &
382#pragma hmpp & args[cptGradients::N; extraFromGradient::N;lmtVByNeigh::N]
383#pragma hmpp <extra2LV> map, &
384#pragma hmpp & args[cptGradients::N2; extraFromGradient::N2]
385#pragma hmpp <extra2LV> map, &
386#pragma hmpp & args[cptGradients::N3; extraFromGradient::N3; lmtVByNeigh::N3]
387#pragma hmpp <extra2LV> map, &
388#pragma hmpp & args[cptGradients::centroids; extraFromGradient::centroids]
389#pragma hmpp <extra2LV> map, &
390#pragma hmpp & args[cptGradients::centroid_values; extraFromGradient::centroid_values; lmtVByNeigh::centroid_values]
391#pragma hmpp <extra2LV> map, &
392#pragma hmpp & args[cptGradients::a; extraFromGradient::a; lmtVByNeigh::x_gradient]
393#pragma hmpp <extra2LV> map, &
394#pragma hmpp & args[cptGradients::b; extraFromGradient::b; lmtVByNeigh::y_gradient]
395#pragma hmpp <extra2LV> map, &
396#pragma hmpp & args[extraFromGradient::vertex_values; lmtVByNeigh::vertex_values]
397#pragma hmpp <extra2LV> map, &
398#pragma hmpp & args[extraFromGradient::edge_values; lmtVByNeigh::edge_values]
399#endif
400
401
402#ifndef NON_DIRECTIVES_EXTRA2_VERTEX_CPTGRA
403#pragma hmpp cptGradients codelet, target=CUDA, transfer=atcall, &
404#pragma hmpp & args[ centroids, centroid_values, number_of_boundaries, &
405#pragma hmpp & surrogate_neighbours, a, b].mirror, &
406#pragma hmpp & args[ centroids, centroid_values, number_of_boundaries, &
407#pragma hmpp & surrogate_neighbours, a, b].transfer=manual
408#endif
409void _compute_gradients(
410        int N,
411        int N2,
412        int N3,
413        double centroids[N2],
414        double centroid_values[N],
415        long number_of_boundaries[N],
416        long surrogate_neighbours[N3],
417        double a[N],
418        double b[N]);
419
420
421
422#ifndef NON_DIRECTIVES_EXTRA2_VERTEX_EXTRA_FROM_GRA
423#pragma hmpp extraFromGradient codelet, target=CUDA, transfer=atcall, &
424#pragma hmpp & args[ centroids, centroid_values, vertex_coordinates, vertex_values, a, &
425#pragma hmpp & b].mirror, &
426#pragma hmpp & args[ centroids, centroid_values, vertex_coordinates, vertex_values, a, &
427#pragma hmpp & b].transfer=manual
428#endif
429void _extrapolate_from_gradient(
430        int N,
431        int N2,
432        int N3,
433        int N6,
434        double centroids[N2],
435        double centroid_values[N],
436        double vertex_coordinates[N6],
437        double vertex_values[N3],
438        double edge_values[N3],
439        double a[N],
440        double b[N]);
441
442
443
444#ifndef NON_DIRECTIVES_EXTRA2_VERTEX
445#pragma hmpp lmtVByNeigh codelet, target=CUDA, transfer=atcall &
446#pragma hmpp & args[centroid_values, vertex_values, edge_values, &
447#pragma hmpp & neighbours, x_gradient, y_gradient].mirror &
448#pragma hmpp & args[centroid_values, vertex_values, edge_values, &
449#pragma hmpp & neighbours, x_gradient, y_gradient].transfer=manual
450#endif
451void _limit_vertices_by_all_neighbours(
452        int N, 
453        int N3,
454        double beta,
455        double centroid_values[N],
456        double vertex_values[N3],
457        double edge_values[N3],
458        long   neighbours[N3],
459        double x_gradient[N],
460        double y_gradient[N]);
461
462
463
464//#pragma hmpp extraSndOrderLmtE codelet, target=CUDA args[*].transfer=atcall
465void extrapolate_second_order_and_limit_by_edge(
466        int N,
467        int N2,
468        int N3,
469        int N6,
470        double beta,
471        double * domain_centroid_coordinates,
472        double * domain_vertex_coordinates,
473        long * domain_number_of_boundaries,
474        long * domain_surrogate_neighbours,
475        long * domain_neighbours,
476
477        double * quantity_centroid_values,
478        double * quantity_vertex_values,
479        double * quantity_edge_values,
480        double * quantity_x_gradient,
481        double * quantity_y_gradient
482        );
483
484
485
486#ifdef USING_GLOBAL_DIRECTIVES
487#ifdef USING_MIRROR_DATA
488#pragma hmpp balance codelet, target=CUDA, transfer=atcall &
489#pragma hmpp & args[wc, zc, wv, zv, xmomc, ymomc, xmomv, ymomv].mirror &
490#pragma hmpp & args[wc, zc, wv, zv, xmomc, ymomc, xmomv, ymomv].transfer=manual
491#else
492#pragma hmpp balance codelet, target=CUDA args[*].transfer=atcall
493#endif
494#endif
495void balance_deep_and_shallow(
496        int N,
497        int N3,
498        double H0,
499        double alpha_balance,
500        int tight_slope_limiters,
501        int use_centroid_velocities,
502
503        double wc[N],   // stage_centroid_values
504        double zc[N],   // elevation_centroid_values
505        double wv[N3],  // stage_vertex_values
506        double zv[N3],  // elevation_vertex_values
507        //double* hvbar,// Retire this
508        double xmomc[N],  // xmom_centroid_values
509        double ymomc[N],  // ymom_centroid_values
510        double xmomv[N3],  // xmom_vertex_values
511        double ymomv[N3]   // ymom_vertex_values
512        ); 
513
514
515
516#ifdef USING_GLOBAL_DIRECTIVES
517#ifdef USING_MIRROR_DATA
518#pragma hmpp setBoundaryE codelet, target=CUDA, transfer=atcall &
519#pragma hmpp & args[vol_id, edge_id, boundary_values, edge_values].mirror &
520#pragma hmpp & args[vol_id, edge_id, boundary_values, edge_values].transfer=manual
521#else
522#pragma hmpp setBoundaryE codelet, target=CUDA args[*].transfer=atcall
523#endif
524#endif
525void set_boundary_values_from_edges(
526        int Nb,
527        int N3,
528        long vol_id[Nb],
529        long edge_id[Nb],
530        double boundary_values[Nb],
531        double edge_values[N3]
532        );
533
534
535// swb2_domain.c
536// Not used so far
537#ifdef USING_GLOBAL_DIRECTIVES
538#ifdef USING_MIRROR_DATA
539#pragma hmpp protectSWB2 codelet, target=CUDA, transfer=atcall &
540#pragma hmpp & args[wc, wv, zc, zv, xmomc, ymomc, areas].mirror &
541#pragma hmpp & args[wc, wv, zc, zv, xmomc, ymomc, areas].transfer=manual
542#else
543#pragma hmpp protectSWB2 codelet, target=CUDA args[*].transfer=atcall
544#endif
545#endif
546void protect_swb2(
547        long N,
548        long N3,
549
550        double minimum_allowed_height,
551        double maximum_allowed_speed,
552        double epsilon,
553       
554        double* wc,
555        double* wv,
556        double* zc,
557        double* zv,
558        double* xmomc,
559        double* ymomc,
560        double* areas);
561
562
563
564#ifdef USING_GLOBAL_DIRECTIVES
565#ifdef USING_MIRROR_DATA
566#pragma hmpp protectSW codelet, target=CUDA, transfer=atcall &
567#pragma hmpp & args[wc, zc, xmomc, ymomc].mirror &
568#pragma hmpp & args[wc, zc, xmomc, ymomc].transfer=manual
569#else
570#pragma hmpp protectSW codelet, target=CUDA args[*].transfer=atcall
571#endif
572#endif
573void protect_sw(
574        int N,
575        int N3,
576        double minimum_allowed_height,
577        double maximum_allowed_speed,
578        double epsilon,
579
580        double wc[N],   // stage_centroid_values
581        double zc[N],   // bed_centroid_values
582        double xmomc[N],// xmom_centroid_values
583        double ymomc[N] //ymom_centroid_values
584        );
585
586
587#ifdef USING_GLOBAL_DIRECTIVES
588#ifdef USING_MIRROR_DATA
589#pragma hmpp interpolateVtoE codelet, target=CUDA, transfer=atcall &
590#pragma hmpp & args[vertex_values, edge_values].mirror &
591#pragma hmpp & args[vertex_values, edge_values].transfer=manual
592#else
593#pragma hmpp interpolateVtoE codelet, target=CUDA args[*].transfer=atcall
594#endif
595#endif
596void interpolate_from_vertices_to_edges(
597        int N,
598        int N3,
599        double vertex_values[N3],
600        double edge_values[N3]
601        ); 
602
603
604       
605#ifdef USING_GLOBAL_DIRECTIVES
606#ifdef USING_MIRROR_DATA
607#pragma hmpp updateCentroidVH codelet, target=CUDA,  transfer=atcall &
608#pragma hmpp & args[w_C, uh_C, vh_C, h_C, z_C, u_C, v_C, &
609#pragma hmpp &      w_B, uh_B, vh_B, h_B, z_B, u_B, v_B].mirror &
610#pragma hmpp & args[w_C, uh_C, vh_C, h_C, z_C, u_C, v_C, &
611#pragma hmpp &      w_B, uh_B, vh_B, h_B, z_B, u_B, v_B].transfer=manual
612#else
613#pragma hmpp updateCentroidVH codelet, target=CUDA args[*].transfer=atcall
614#endif
615#endif
616void _update_centroids_of_velocities_and_height(
617        int N_c,
618        int N_b,
619        double w_C[N_c], // stage_centroid_values
620        double uh_C[N_c],// xmomentum_centroid_values
621        double vh_C[N_c],// ymomentum_centroid_values
622        double h_C[N_c], // height_centroid_values
623        double z_C[N_c], // elevation_centroid_values
624        double u_C[N_c], // xvelocity_centroid_values
625        double v_C[N_c], // yvelocity_centroid_values
626
627        double w_B[N_b], // stage_boundary_values
628        double uh_B[N_b],// xmomentum_boundary_values
629        double vh_B[N_b],// ymomentum_boundary_values
630        double h_B[N_b], // height_boundary_values
631        double z_B[N_b], // elevation_boundary_values
632        double u_B[N_b], // xvelocity_boundary_values
633        double v_B[N_b] // yvelocity_boundary_values
634        );
635
636
637
638#ifdef USING_GLOBAL_DIRECTIVES
639#ifdef USING_MIRROR_DATA
640#pragma hmpp manFrictionFlat codelet, target=CUDA, transfer=atcall &
641#pragma hmpp & args[w, zv, uh, vh, eta, xmom, ymom].mirror &
642#pragma hmpp & args[w, zv, uh, vh, eta, xmom, ymom].transfer=manual
643#else
644#pragma hmpp manFrictionFlat codelet, target=CUDA args[*].transfer=atcall
645#endif
646#endif
647void manning_friction_flat(
648        int N,
649        int N3,
650        double g, 
651        double eps, // minimum_allowed_height
652
653        double w[N],  // stage_centroid_values
654        double zv[N3], // elevation_vertex_values
655        double uh[N], // xmom_centroid_values
656        double vh[N], // ymom_centroid_values
657        double eta[N],// friction_centroid_values
658        double xmom[N],//xmom_semi_implicit_update
659        double ymom[N]//ymom_semi_implicit_update
660        );
661
662
663
664#ifdef USING_GLOBAL_DIRECTIVES
665#ifdef USING_MIRROR_DATA
666#pragma hmpp manFrictionSloped codelet, target=CUDA, transfer=atcall &
667#pragma hmpp & args[x, w, zv, uh, vh, eta, xmom_update, ymom_update].mirror &
668#pragma hmpp & args[x, w, zv, uh, vh, eta, xmom_update, &
669#pragma hmpp & ymom_update].transfer=manual
670#else
671#pragma hmpp manFrictionSloped codelet, target=CUDA args[*].transfer=atcall
672#endif
673#endif
674void manning_friction_sloped(
675        int N,
676        int N3,
677        int N6,
678        double g, 
679        double eps, // minimum_allowed_height
680
681        double x[N6],  // vertex_coordinates
682        double w[N],  // stage_centroid_values
683        double zv[N3], // elevation_vertex_values
684        double uh[N], // xmom_centroid_values
685        double vh[N], // ymom_centroid_values
686        double eta[N],// friction_centroid_values
687        double xmom_update[N],    // xmom_semi_implicit_update
688        double ymom_update[N]    // ymom_semi_implicit_update
689        );
690
691
692
693#ifdef USING_GLOBAL_DIRECTIVES
694#ifdef USING_MIRROR_DATA
695#pragma hmpp extraSndVelocity codelet, target=CUDA, transfer=atcall &
696#pragma hmpp & args[stage_centroid_values, bed_centroid_values, &
697#pragma hmpp & xmom_centroid_values, xmom_centroid_store, &
698#pragma hmpp & ymom_centroid_values, ymom_centroid_store].mirror &
699#pragma hmpp & args[stage_centroid_values, bed_centroid_values, &
700#pragma hmpp & xmom_centroid_values, xmom_centroid_store, &
701#pragma hmpp & ymom_centroid_values, ymom_centroid_store].transfer=manual
702#else
703#pragma hmpp extraSndVelocity codelet, target=CUDA args[*].transfer=atcall
704#endif
705#endif
706void extrapolate_second_order_velocity_true(
707            int N,
708            double minimum_allowed_height,
709            double stage_centroid_values[N],
710            double bed_centroid_values[N],
711            double xmom_centroid_values[N],
712            double xmom_centroid_store[N],
713            double ymom_centroid_values[N],
714            double ymom_centroid_store[N]
715            );
716
717
718           
719#ifdef USING_GLOBAL_DIRECTIVES
720#ifdef USING_MIRROR_DATA
721#pragma hmpp extraSndOrderSWT codelet, target=CUDA, transfer=atcall &
722#pragma hmpp & args[surrogate_neighbours, number_of_boundaries, &
723#pragma hmpp & centroid_coordinates, stage_centroid_values, &
724#pragma hmpp & bed_centroid_values, xmom_centroid_values, &
725#pragma hmpp & ymom_centroid_values, vertex_coordinates, &
726#pragma hmpp & stage_vertex_values, bed_vertex_values, &
727#pragma hmpp & xmom_vertex_values, ymom_vertex_values].mirror &
728#pragma hmpp & args[surrogate_neighbours, number_of_boundaries, &
729#pragma hmpp & centroid_coordinates, stage_centroid_values, &
730#pragma hmpp & bed_centroid_values, xmom_centroid_values, &
731#pragma hmpp & ymom_centroid_values, vertex_coordinates, &
732#pragma hmpp & stage_vertex_values, bed_vertex_values, &
733#pragma hmpp & xmom_vertex_values, ymom_vertex_values].transfer=manual
734#else
735#pragma hmpp extraSndOrderSWT codelet, target=CUDA args[*].transfer=atcall
736#endif
737#endif
738void extrapolate_second_order_sw_true (
739        int N,
740        int N2,
741        int N3,
742        int N6,
743        double epsilon,
744        double minimum_allowed_height,
745        double beta_w,
746        double beta_w_dry,
747        double beta_uh,
748        double beta_uh_dry,
749        double beta_vh,
750        double beta_vh_dry,
751        int optimise_dry_cells,
752
753        long surrogate_neighbours[N3],
754        long number_of_boundaries[N],
755        double centroid_coordinates[N2],
756
757        double stage_centroid_values[N],
758        double bed_centroid_values[N],
759        double xmom_centroid_values[N],
760        double ymom_centroid_values[N],
761
762        double vertex_coordinates[N6],
763       
764        double stage_vertex_values[N3],
765        double bed_vertex_values[N3],
766        double xmom_vertex_values[N3],
767        double ymom_vertex_values[N3]
768        );
769
770
771
772#ifdef USING_GLOBAL_DIRECTIVES
773#ifdef USING_MIRROR_DATA
774#pragma hmpp extraSndOrderSW codelet, target=CUDA, transfer=atcall &
775#pragma hmpp & args[surrogate_neighbours, number_of_boundaries, &
776#pragma hmpp & centroid_coordinates, stage_centroid_values, &
777#pragma hmpp & bed_centroid_values, xmom_centroid_values, &
778#pragma hmpp & ymom_centroid_values, vertex_coordinates, &
779#pragma hmpp & stage_vertex_values, bed_vertex_values, &
780#pragma hmpp & xmom_vertex_values, ymom_vertex_values, &
781#pragma hmpp & stage_centroid_store, xmom_centroid_store, &
782#pragma hmpp & ymom_centroid_store].mirror &
783#pragma hmpp & args[surrogate_neighbours, number_of_boundaries, &
784#pragma hmpp & centroid_coordinates, stage_centroid_values, &
785#pragma hmpp & bed_centroid_values, xmom_centroid_values, &
786#pragma hmpp & ymom_centroid_values, vertex_coordinates, &
787#pragma hmpp & stage_vertex_values, bed_vertex_values, &
788#pragma hmpp & xmom_vertex_values, ymom_vertex_values, &
789#pragma hmpp & stage_centroid_store, xmom_centroid_store, &
790#pragma hmpp & ymom_centroid_store].transfer=manual
791#else
792#pragma hmpp extraSndOrderSW codelet, target=CUDA args[*].transfer=atcall
793#endif
794#endif
795void extrapolate_second_order_sw( 
796        int N,
797        int N2,
798        int N3,
799        int N6,
800        double epsilon,
801        double minimum_allowed_height,
802        double beta_w,
803        double beta_w_dry,
804        double beta_uh,
805        double beta_uh_dry,
806        double beta_vh,
807        double beta_vh_dry,
808        int optimise_dry_cells,
809        int extrapolate_velocity_second_order,
810
811        long surrogate_neighbours[N3],
812        long number_of_boundaries[N],
813        double centroid_coordinates[N2],
814
815        double stage_centroid_values[N],
816        double bed_centroid_values[N],
817        double xmom_centroid_values[N],
818        double ymom_centroid_values[N],
819
820        double vertex_coordinates[N6],
821       
822        double stage_vertex_values[N3],
823        double bed_vertex_values[N3],
824        double xmom_vertex_values[N3],
825        double ymom_vertex_values[N3],
826        double stage_centroid_store[N],
827        double xmom_centroid_store[N],
828        double ymom_centroid_store[N]
829        );
830
831
832
833#ifdef USING_GLOBAL_DIRECTIVES
834#ifdef USING_MIRROR_DATA
835#pragma hmpp extraSndOrderSWF codelet, target=CUDA, transfer=atcall &
836#pragma hmpp & args[surrogate_neighbours, number_of_boundaries, &
837#pragma hmpp & centroid_coordinates, stage_centroid_values, &
838#pragma hmpp & bed_centroid_values, xmom_centroid_values, &
839#pragma hmpp & ymom_centroid_values, vertex_coordinates, &
840#pragma hmpp & stage_vertex_values, bed_vertex_values, &
841#pragma hmpp & xmom_vertex_values, ymom_vertex_values].mirror &
842#pragma hmpp & args[surrogate_neighbours, number_of_boundaries, &
843#pragma hmpp & centroid_coordinates, stage_centroid_values, &
844#pragma hmpp & bed_centroid_values, xmom_centroid_values, &
845#pragma hmpp & ymom_centroid_values, vertex_coordinates, &
846#pragma hmpp & stage_vertex_values, bed_vertex_values, &
847#pragma hmpp & xmom_vertex_values, ymom_vertex_values].transfer=manual
848#else
849#pragma hmpp extraSndOrderSWF codelet, target=CUDA args[*].transfer=atcall
850#endif
851#endif
852void extrapolate_second_order_sw_false (
853        int N,
854        int N3,
855        int N6,
856        double epsilon,
857        double minimum_allowed_height,
858        double beta_w,
859        double beta_w_dry,
860        double beta_uh,
861        double beta_uh_dry,
862        double beta_vh,
863        double beta_vh_dry,
864        int optimise_dry_cells,
865
866        long surrogate_neighbours[N3],
867        long number_of_boundaries[N],
868        double centroid_coordinates[N3],
869
870        double stage_centroid_values[N],
871        double bed_centroid_values[N],
872        double xmom_centroid_values[N],
873        double ymom_centroid_values[N],
874
875        double vertex_coordinates[N6],
876       
877        double stage_vertex_values[N3],
878        double bed_vertex_values[N3],
879        double xmom_vertex_values[N3],
880        double ymom_vertex_values[N3]
881        );
882
883
884
885#ifdef USING_GLOBAL_DIRECTIVES
886#ifdef USING_MIRROR_DATA
887#pragma hmpp update codelet, target=CUDA, transfer=atcall &
888#pragma hmpp & args[centroid_values, explicit_update, &
889#pragma hmpp & semi_implicit_update].mirror &
890#pragma hmpp & args[centroid_values, explicit_update, &
891#pragma hmpp & semi_implicit_update].transfer=manual
892#else
893#pragma hmpp update codelet, target=CUDA args[*].transfer=atcall
894#endif
895#endif
896void update(
897        int N,
898        double timestep,
899        double centroid_values[N],
900        double explicit_update[N],
901        double semi_implicit_update[N]
902        );
903   
904
905
906#ifdef USING_GLOBAL_DIRECTIVES
907#ifdef USING_MIRROR_DATA
908#pragma hmpp saxpyCen codelet, target=CUDA, transfer=atcall &
909#pragma hmpp & args[centroid_values, centroid_backup_values].mirror &
910#pragma hmpp & args[centroid_values, centroid_backup_values].transfer=manual
911#else
912#pragma hmpp saxpyCen codelet, target=CUDA args[*].transfer=atcall
913#endif
914#endif
915void saxpy_centroid_values(
916        int N,
917        double a,
918        double b,
919        double centroid_values[N],
920        double centroid_backup_values[N]
921        );
922
923
924
925
926#ifdef USING_GLOBAL_DIRECTIVES
927#ifdef USING_MIRROR_DATA
928#pragma hmpp evaRef codelet, target=CUDA, transfer=atcall, &
929#pragma hmpp & args[stage_edge_values, bed_edge_values, height_edge_values, &
930#pragma hmpp & xmom_edge_values, xvel_edge_values, yvel_edge_values, &
931#pragma hmpp & stage_boundary_values, bed_boundary_values, &
932#pragma hmpp & height_boundary_values, xmom_boundary_values, &
933#pragma hmpp & ymom_boundary_values, xvel_boundary_values, &
934#pragma hmpp & yvel_boundary_values].mirror, &
935#pragma hmpp & args[stage_edge_values, bed_edge_values, height_edge_values, &
936#pragma hmpp & xmom_edge_values, xvel_edge_values, yvel_edge_values, &
937#pragma hmpp & stage_boundary_values, bed_boundary_values, &
938#pragma hmpp & height_boundary_values, xmom_boundary_values, &
939#pragma hmpp & ymom_boundary_values, xvel_boundary_values, &
940#pragma hmpp & yvel_boundary_values].transfer=manual
941#else
942#pragma hmpp evaRef codelet, target=CUDA args[*].transfer=atcall
943#endif
944#endif
945void evaluate_segment_reflective(
946    int N1,   // Nids
947    int N2,     // Nb
948    int N3,
949    int N6,
950
951    long ids[N1],
952    long vol_ids[N2],   // domain.boundary_cells
953    long edge_ids[N2],  // domain.boundary_edges
954    double normals[N6], 
955   
956    double stage_edge_values[N3],
957    double bed_edge_values[N3],
958    double height_edge_values[N3],
959    double xmom_edge_values[N3],
960    double ymom_edge_values[N3],
961    double xvel_edge_values[N3],
962    double yvel_edge_values[N3],
963
964    double stage_boundary_values[N2],
965    double bed_boundary_values[N2],
966    double height_boundary_values[N2],
967    double xmom_boundary_values[N2],
968    double ymom_boundary_values[N2],
969    double xvel_boundary_values[N2],
970    double yvel_boundary_values[N2]
971    );
972
973
974
975// swb2
976int _find_qmin_and_qmax(double dq0, double dq1, double dq2, 
977               double *qmin, double *qmax);
978
979
980int _limit_gradient(double *dqv, double qmin, double qmax, double beta_w);
981
982
983
984// extrapolate_second_order_sw
985int limit_gradient(
986        double *dqv0, 
987        double *dqv1, 
988        double *dqv2, 
989        double qmin, 
990        double qmax, 
991        double beta_w); 
992
993
994int limit_gradient_old(
995        double *dqv, 
996        double qmin, 
997        double qmax, 
998        double beta_w);
999
1000int find_qmin_and_qmax(
1001        double dq0, 
1002        double dq1, 
1003        double dq2,
1004        double *qmin, 
1005        double *qmax); 
1006
1007
1008
1009void test_call();
1010// protect_sw.c
1011void test_protect_sw();
1012
1013
1014
1015void test_single( struct domain *D);
Note: See TracBrowser for help on using the repository browser.