Ignore:
Timestamp:
Aug 28, 2007, 5:09:57 PM (16 years ago)
Author:
ole
Message:

Optimised gravity term making update_conserved_quantities faster.
Instead of computing the average depth from vertices we now use the centroid values directly.

On the Linux machine nautilus the okushiri profile increased the speed of update_conserved_quantities from 4.52s to 2.8s or a 38% increase. The total time was about 38s making this optimisation a 5% overall improvement.

An experimental optimisation for flat beds did not yield any measurable
improvements, so it was commented out.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • anuga_core/source/anuga/shallow_water/shallow_water_ext.c

    r4685 r4687  
    203203  double s_min, s_max, soundspeed_left, soundspeed_right;
    204204  double denom, z;
    205   double q_left_copy[3], q_right_copy[3];
     205  double q_left_rotated[3], q_right_rotated[3];
    206206  double flux_right[3], flux_left[3];
    207207
     
    210210                     //epsilon!
    211211 
    212   //Copy conserved quantities to protect from modification
     212  // Copy conserved quantities to protect from modification
    213213  for (i=0; i<3; i++) {
    214     q_left_copy[i] = q_left[i];
    215     q_right_copy[i] = q_right[i];
    216   }
    217 
    218   //Align x- and y-momentum with x-axis
    219   _rotate(q_left_copy, n1, n2);
    220   _rotate(q_right_copy, n1, n2);
    221 
    222   z = (z_left+z_right)/2; //Take average of field values
    223 
    224   //Compute speeds in x-direction
    225   w_left = q_left_copy[0];         
     214    q_left_rotated[i] = q_left[i];
     215    q_right_rotated[i] = q_right[i];
     216  }
     217
     218  // Align x- and y-momentum with x-axis
     219  _rotate(q_left_rotated, n1, n2);
     220  _rotate(q_right_rotated, n1, n2);
     221
     222  z = (z_left+z_right)/2; // Average elevation values
     223
     224  // Compute speeds in x-direction
     225  w_left = q_left_rotated[0];         
    226226  h_left = w_left-z;
    227   uh_left = q_left_copy[1];
     227  uh_left = q_left_rotated[1];
    228228  u_left = _compute_speed(&uh_left, &h_left, epsilon, h0);
    229229
    230   w_right = q_right_copy[0];
     230  w_right = q_right_rotated[0];
    231231  h_right = w_right-z;
    232   uh_right = q_right_copy[1];
     232  uh_right = q_right_rotated[1];
    233233  u_right = _compute_speed(&uh_right, &h_right, epsilon, h0); 
    234234
    235   //Momentum in y-direction
    236   vh_left  = q_left_copy[2];
    237   vh_right = q_right_copy[2];
     235  // Momentum in y-direction
     236  vh_left  = q_left_rotated[2];
     237  vh_right = q_right_rotated[2];
    238238
    239239  // Limit y-momentum if necessary 
     
    241241  v_right = _compute_speed(&vh_right, &h_right, epsilon, h0);
    242242
    243   //Maximal and minimal wave speeds
     243  // Maximal and minimal wave speeds
    244244  soundspeed_left  = sqrt(g*h_left);
    245245  soundspeed_right = sqrt(g*h_right);
     
    252252
    253253 
    254   //Flux formulas
     254  // Flux formulas
    255255  flux_left[0] = u_left*h_left;
    256256  flux_left[1] = u_left*uh_left + 0.5*g*h_left*h_left;
     
    263263 
    264264 
    265   //Flux computation
     265  // Flux computation
    266266  denom = s_max-s_min;
    267267  if (denom == 0.0) {
     
    271271    for (i=0; i<3; i++) {
    272272      edgeflux[i] = s_max*flux_left[i] - s_min*flux_right[i];
    273       edgeflux[i] += s_max*s_min*(q_right_copy[i]-q_left_copy[i]);
     273      edgeflux[i] += s_max*s_min*(q_right_rotated[i]-q_left_rotated[i]);
    274274      edgeflux[i] /= denom;
    275275    }
    276276
    277     //Maximal wavespeed
     277    // Maximal wavespeed
    278278    *max_speed = max(fabs(s_max), fabs(s_min));
    279279
    280     //Rotate back
     280    // Rotate back
    281281    _rotate(edgeflux, n1, -n2);
    282282  }
     
    321321  double s_min, s_max, soundspeed_left, soundspeed_right;
    322322  double z;
    323   double q_left_copy[3], q_right_copy[3];
     323  double q_left_rotated[3], q_right_rotated[3];
    324324
    325325  double h0 = H0*H0; //This ensures a good balance when h approaches H0.
     
    327327  //Copy conserved quantities to protect from modification
    328328  for (i=0; i<3; i++) {
    329     q_left_copy[i] = q_left[i];
    330     q_right_copy[i] = q_right[i];
     329    q_left_rotated[i] = q_left[i];
     330    q_right_rotated[i] = q_right[i];
    331331  }
    332332
    333333  //Align x- and y-momentum with x-axis
    334   _rotate(q_left_copy, n1, n2);
    335   _rotate(q_right_copy, n1, n2);
     334  _rotate(q_left_rotated, n1, n2);
     335  _rotate(q_right_rotated, n1, n2);
    336336
    337337  z = (z_left+z_right)/2; //Take average of field values
    338338
    339339  //Compute speeds in x-direction
    340   w_left = q_left_copy[0];         
     340  w_left = q_left_rotated[0];         
    341341  h_left = w_left-z;
    342   uh_left = q_left_copy[1];
     342  uh_left = q_left_rotated[1];
    343343  u_left =_compute_speed(&uh_left, &h_left, epsilon, h0);
    344344
    345   w_right = q_right_copy[0];
     345  w_right = q_right_rotated[0];
    346346  h_right = w_right-z;
    347   uh_right = q_right_copy[1];
     347  uh_right = q_right_rotated[1];
    348348  u_right =_compute_speed(&uh_right, &h_right, epsilon, h0); 
    349349
    350350
    351351  //Momentum in y-direction
    352   vh_left  = q_left_copy[2];
    353   vh_right = q_right_copy[2];
     352  vh_left  = q_left_rotated[2];
     353  vh_right = q_right_rotated[2];
    354354
    355355
     
    702702
    703703  PyArrayObject *h, *v, *x, *xmom, *ymom;
    704   int k, i, N, k3, k6;
     704  int k, N, k3, k6;
    705705  double g, avg_h, zx, zy;
    706706  double x0, y0, x1, y1, x2, y2, z0, z1, z2;
     707  //double epsilon;
    707708
    708709  if (!PyArg_ParseTuple(args, "dOOOOO",
    709710                        &g, &h, &v, &x,
    710711                        &xmom, &ymom)) {
     712    //&epsilon)) {
    711713    PyErr_SetString(PyExc_RuntimeError, "shallow_water_ext.c: gravity could not parse input arguments");
    712714    return NULL;
     
    716718  for (k=0; k<N; k++) {
    717719    k3 = 3*k;  // base index
     720
     721    // Get bathymetry
     722    z0 = ((double*) v -> data)[k3 + 0];
     723    z1 = ((double*) v -> data)[k3 + 1];
     724    z2 = ((double*) v -> data)[k3 + 2];
     725
     726    // Optimise for flat bed
     727    // Note (Ole): This didn't produce measurable speed up.
     728    // Revisit later
     729    //if (fabs(z0-z1)<epsilon && fabs(z1-z2)<epsilon) {
     730    //  continue;
     731    //}
     732
     733    // Get average depth from centroid values
     734    avg_h = ((double *) h -> data)[k];
     735
     736    // Compute bed slope
    718737    k6 = 6*k;  // base index
    719 
    720     avg_h = 0.0;
    721     for (i=0; i<3; i++) {
    722       avg_h += ((double *) h -> data)[k3+i];
    723     }
    724     avg_h /= 3;
    725 
    726 
    727     //Compute bed slope
     738 
    728739    x0 = ((double*) x -> data)[k6 + 0];
    729740    y0 = ((double*) x -> data)[k6 + 1];
     
    734745
    735746
    736     z0 = ((double*) v -> data)[k3 + 0];
    737     z1 = ((double*) v -> data)[k3 + 1];
    738     z2 = ((double*) v -> data)[k3 + 2];
    739 
    740747    _gradient(x0, y0, x1, y1, x2, y2, z0, z1, z2, &zx, &zy);
    741748
    742     //Update momentum
     749    // Update momentum
    743750    ((double*) xmom -> data)[k] += -g*zx*avg_h;
    744751    ((double*) ymom -> data)[k] += -g*zy*avg_h;
     
    13171324  double edgeflux[3]; // Work array for summing up fluxes
    13181325
    1319   int number_of_elements, k, i, m, n; //, j, computation_needed;
     1326  int number_of_elements, k, i, m, n;
    13201327
    13211328  int ki, nm=0, ki2; // Index shorthands
     
    14201427      }
    14211428     
    1422 
    1423      
    14241429           
    14251430      // Outward pointing normal vector (domain.normals[k, 2*i:2*i+2])
     
    14281433      normal[1] = ((double *) normals -> data)[ki2+1];
    14291434     
     1435
    14301436      // Edge flux computation (triangle k, edge i)
    14311437      flux_function_central(ql, qr, zl, zr,
Note: See TracChangeset for help on using the changeset viewer.