Ignore:
Timestamp:
Jun 2, 2009, 8:52:47 AM (15 years ago)
Author:
ole
Message:

Thought a bit more about the flux optimisation implemented in changeset:7105

The y-momentum limiter has been reinstated. Instead, I implemented a cut-off for limiting the momentum as described in the manual (see Section about flux limiter). For the simple profile compute_fluxes now takes 2.964s (as opposed to 4.284s prior to changeset:7105) and the overall runtime of evolve takes 9.797s (as opposed to 10.909s). For the okushiri example the timings are now 109.551s for compute_fluxes and 293.314s overall.

Overall this amounts to a 10% overall performance improvement.

See also changeset:6703 for past timings

These timings were conducted on a 3.0GHz Intel Core-2 running Ubuntu Linux.

I had to updated a few tests that used to compare to exact values that are now slightly different.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • anuga_core/source/anuga/shallow_water/shallow_water_ext.c

    r7105 r7143  
    174174                      double *h,
    175175                      double epsilon,
    176                       double h0) {
     176                      double h0,
     177                      double limiting_threshold) {
    177178 
    178179  double u;
    179180
    180  
    181   if (*h < epsilon) {
    182     *h = 0.0;  // Could have been negative
    183     u = 0.0;
     181  if (*h < limiting_threshold) {   
     182    // Apply limiting of speeds according to the ANUGA manual
     183    if (*h < epsilon) {
     184      *h = 0.0;  // Could have been negative
     185      u = 0.0;
     186    } else {
     187      u = *uh/(*h + h0/ *h);   
     188    }
     189 
     190
     191    // Adjust momentum to be consistent with speed
     192    *uh = u * *h;
    184193  } else {
    185     u = *uh/(*h + h0/ *h);   
     194    // We are in deep water - no need for limiting
     195    u = *uh/ *h;
    186196  }
    187  
    188 
    189   // Adjust momentum to be consistent with speed
    190   *uh = u * *h;
    191197 
    192198  return u;
     
    253259                           double z_left, double z_right,
    254260                           double n1, double n2,
    255                            double epsilon, double H0, double g,
     261                           double epsilon,
     262                           double h0,
     263                           double limiting_threshold,
     264                           double g,
    256265                           double *edgeflux, double *max_speed)
    257266{
     
    272281  double w_left, h_left, uh_left, vh_left, u_left;
    273282  double w_right, h_right, uh_right, vh_right, u_right;
    274   //double v_left, v_right; 
    275283  double s_min, s_max, soundspeed_left, soundspeed_right;
    276284  double denom, inverse_denominator, z;
     
    279287  static double q_left_rotated[3], q_right_rotated[3], flux_right[3], flux_left[3];
    280288
    281   double h0 = H0*H0; // This ensures a good balance when h approaches H0.
    282                      // But evidence suggests that h0 can be as little as
    283                      // epsilon!
    284  
    285289  // Copy conserved quantities to protect from modification
    286290  q_left_rotated[0] = q_left[0];
     
    306310  h_left = w_left - z;
    307311  uh_left = q_left_rotated[1];
    308   u_left = _compute_speed(&uh_left, &h_left, epsilon, h0);
     312  u_left = _compute_speed(&uh_left, &h_left,
     313                          epsilon, h0, limiting_threshold);
    309314
    310315  w_right = q_right_rotated[0];
    311316  h_right = w_right - z;
    312317  uh_right = q_right_rotated[1];
    313   u_right = _compute_speed(&uh_right, &h_right, epsilon, h0); 
     318  u_right = _compute_speed(&uh_right, &h_right,
     319                           epsilon, h0, limiting_threshold);
    314320
    315321  // Momentum in y-direction
     
    320326  // Leaving this out, improves speed significantly (Ole 27/5/2009)
    321327  // All validation tests pass, so do we really need it anymore?
    322   //v_left = _compute_speed(&vh_left, &h_left, epsilon, h0);
    323   //v_right = _compute_speed(&vh_right, &h_right, epsilon, h0);
     328  _compute_speed(&vh_left, &h_left,
     329                 epsilon, h0, limiting_threshold);
     330  _compute_speed(&vh_right, &h_right,
     331                 epsilon, h0, limiting_threshold);
    324332
    325333  // Maximal and minimal wave speeds
     
    367375  denom = s_max - s_min;
    368376  if (denom < epsilon)
    369   { // FIXME (Ole): Try using H0 here
     377  { // FIXME (Ole): Try using h0 here
    370378    memset(edgeflux, 0, 3*sizeof(double));
    371379    *max_speed = 0.0;
     
    429437
    430438  double h0 = H0*H0; //This ensures a good balance when h approaches H0.
    431 
     439  double limiting_threshold = 10*H0; // Avoid applying limiter below this 
     440 
    432441  //Copy conserved quantities to protect from modification
    433442  for (i=0; i<3; i++) {
     
    446455  h_left = w_left-z;
    447456  uh_left = q_left_rotated[1];
    448   u_left =_compute_speed(&uh_left, &h_left, epsilon, h0);
     457  u_left =_compute_speed(&uh_left, &h_left,
     458                         epsilon, h0, limiting_threshold);
    449459
    450460  w_right = q_right_rotated[0];
    451461  h_right = w_right-z;
    452462  uh_right = q_right_rotated[1];
    453   u_right =_compute_speed(&uh_right, &h_right, epsilon, h0); 
     463  u_right =_compute_speed(&uh_right, &h_right,
     464                          epsilon, h0, limiting_threshold);
    454465
    455466
     
    930941  PyArrayObject *normal, *ql, *qr,  *edgeflux;
    931942  double g, epsilon, max_speed, H0, zl, zr;
     943  double h0, limiting_threshold;
    932944
    933945  if (!PyArg_ParseTuple(args, "OOOddOddd",
     
    939951
    940952 
     953  h0 = H0*H0; // This ensures a good balance when h approaches H0.
     954              // But evidence suggests that h0 can be as little as
     955              // epsilon!
     956             
     957  limiting_threshold = 10*H0; // Avoid applying limiter below this
     958                              // threshold for performance reasons.
     959                              // See ANUGA manual under flux limiting 
     960 
    941961  _flux_function_central((double*) ql -> data,
    942              (double*) qr -> data,
    943              zl,
    944              zr,                         
    945              ((double*) normal -> data)[0],
    946              ((double*) normal -> data)[1],         
    947              epsilon, H0, g,
    948              (double*) edgeflux -> data,
    949              &max_speed);
     962                         (double*) qr -> data,
     963                         zl,
     964                         zr,                         
     965                         ((double*) normal -> data)[0],
     966                         ((double*) normal -> data)[1],         
     967                         epsilon, h0, limiting_threshold,
     968                         g,
     969                         (double*) edgeflux -> data,
     970                         &max_speed);
    950971 
    951972  return Py_BuildValue("d", max_speed); 
     
    17861807  // Local variables
    17871808  double max_speed, length, inv_area, zl, zr;
     1809  double h0 = H0*H0; // This ensures a good balance when h approaches H0.
     1810
     1811  double limiting_threshold = 10*H0; // Avoid applying limiter below this
     1812                                     // threshold for performance reasons.
     1813                                     // See ANUGA manual under flux limiting
    17881814  int k, i, m, n;
    17891815  int ki, nm=0, ki2; // Index shorthands
     1816 
    17901817 
    17911818  // Workspace (making them static actually made function slightly slower (Ole)) 
     
    17931820
    17941821  static long call = 1; // Static local variable flagging already computed flux
    1795                   
     1822 
    17961823  // Start computation
    17971824  call++; // Flag 'id' of flux calculation for this timestep
     
    18791906      _flux_function_central(ql, qr, zl, zr,
    18801907                             normals[ki2], normals[ki2+1],
    1881                              epsilon, H0, g,
     1908                             epsilon, h0, limiting_threshold, g,
    18821909                             edgeflux, &max_speed);
    18831910     
Note: See TracChangeset for help on using the changeset viewer.