Changeset 7164
- Timestamp:
- Jun 9, 2009, 9:30:09 PM (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/numpy/anuga/shallow_water/shallow_water_ext.c
r6902 r7164 18 18 #include "math.h" 19 19 #include <stdio.h> 20 21 20 #include "numpy_shim.h" 22 21 … … 28 27 29 28 // Computational function for rotation 30 // FIXME: Perhaps inline this and profile 29 // Tried to inline, but no speedup was achieved 27th May 2009 (Ole) 30 // static inline int _rotate(double *q, double n1, double n2) { 31 31 int _rotate(double *q, double n1, double n2) { 32 32 /*Rotate the momentum component q (q[1], q[2]) … … 68 68 if (dq1>=dq2){ 69 69 if (dq1>=0.0) 70 70 *qmax=dq0+dq1; 71 71 else 72 73 72 *qmax=dq0; 73 74 74 *qmin=dq0+dq2; 75 75 if (*qmin>=0.0) *qmin = 0.0; … … 77 77 else{// dq1<dq2 78 78 if (dq2>0) 79 79 *qmax=dq0+dq2; 80 80 else 81 82 81 *qmax=dq0; 82 83 83 *qmin=dq0+dq1; 84 84 if (*qmin>=0.0) *qmin=0.0; … … 88 88 if (dq1<=dq2){ 89 89 if (dq1<0.0) 90 90 *qmin=dq0+dq1; 91 91 else 92 93 92 *qmin=dq0; 93 94 94 *qmax=dq0+dq2; 95 95 if (*qmax<=0.0) *qmax=0.0; … … 97 97 else{// dq1>dq2 98 98 if (dq2<0.0) 99 99 *qmin=dq0+dq2; 100 100 else 101 102 101 *qmin=dq0; 102 103 103 *qmax=dq0+dq1; 104 104 if (*qmax<=0.0) *qmax=0.0; … … 169 169 // This is used by flux functions 170 170 // Input parameters uh and h may be modified by this function. 171 172 // FIXME: Perhaps inline this and profile171 // Tried to inline, but no speedup was achieved 27th May 2009 (Ole) 172 //static inline double _compute_speed(double *uh, 173 173 double _compute_speed(double *uh, 174 double *h, 175 double epsilon, 176 double h0) { 174 double *h, 175 double epsilon, 176 double h0, 177 double limiting_threshold) { 177 178 178 179 double u; 179 180 180 181 if (*h < epsilon) { 182 *h = 0.0; //Could have been negative 183 u = 0.0; 181 if (*h < limiting_threshold) { 182 // Apply limiting of speeds according to the ANUGA manual 183 if (*h < epsilon) { 184 *h = 0.0; // Could have been negative 185 u = 0.0; 186 } else { 187 u = *uh/(*h + h0/ *h); 188 } 189 190 191 // Adjust momentum to be consistent with speed 192 *uh = u * *h; 184 193 } else { 185 u = *uh/(*h + h0/ *h); 194 // We are in deep water - no need for limiting 195 u = *uh/ *h; 186 196 } 187 188 189 // Adjust momentum to be consistent with speed190 *uh = u * *h;191 197 192 198 return u; … … 224 230 225 231 226 // Optimised squareroot computation (double version , slower)232 // Optimised squareroot computation (double version) 227 233 double Xfast_squareroot_approximation(double number) { 228 234 double x; … … 253 259 double z_left, double z_right, 254 260 double n1, double n2, 255 double epsilon, double H0, double g, 261 double epsilon, 262 double h0, 263 double limiting_threshold, 264 double g, 256 265 double *edgeflux, double *max_speed) 257 266 { … … 272 281 double w_left, h_left, uh_left, vh_left, u_left; 273 282 double w_right, h_right, uh_right, vh_right, u_right; 274 double v_left, v_right;275 283 double s_min, s_max, soundspeed_left, soundspeed_right; 276 284 double denom, inverse_denominator, z; … … 279 287 static double q_left_rotated[3], q_right_rotated[3], flux_right[3], flux_left[3]; 280 288 281 double h0 = H0*H0; // This ensures a good balance when h approaches H0.282 // But evidence suggests that h0 can be as little as283 // epsilon!284 285 289 // Copy conserved quantities to protect from modification 286 290 q_left_rotated[0] = q_left[0]; … … 296 300 297 301 z = 0.5*(z_left + z_right); // Average elevation values. 298 // Even though this will nominally allow for discontinuities 299 // in the elevation data, there is currently no numerical 300 // support for this so results may be strange near jumps in the bed. 302 // Even though this will nominally allow 303 // for discontinuities in the elevation data, 304 // there is currently no numerical support for 305 // this so results may be strange near 306 // jumps in the bed. 301 307 302 308 // Compute speeds in x-direction … … 304 310 h_left = w_left - z; 305 311 uh_left = q_left_rotated[1]; 306 u_left = _compute_speed(&uh_left, &h_left, epsilon, h0); 312 u_left = _compute_speed(&uh_left, &h_left, 313 epsilon, h0, limiting_threshold); 307 314 308 315 w_right = q_right_rotated[0]; 309 316 h_right = w_right - z; 310 317 uh_right = q_right_rotated[1]; 311 u_right = _compute_speed(&uh_right, &h_right, epsilon, h0); 318 u_right = _compute_speed(&uh_right, &h_right, 319 epsilon, h0, limiting_threshold); 312 320 313 321 // Momentum in y-direction … … 315 323 vh_right = q_right_rotated[2]; 316 324 317 // Limit y-momentum if necessary 318 v_left = _compute_speed(&vh_left, &h_left, epsilon, h0); 319 v_right = _compute_speed(&vh_right, &h_right, epsilon, h0); 325 // Limit y-momentum if necessary 326 // Leaving this out, improves speed significantly (Ole 27/5/2009) 327 // All validation tests pass, so do we really need it anymore? 328 _compute_speed(&vh_left, &h_left, 329 epsilon, h0, limiting_threshold); 330 _compute_speed(&vh_right, &h_right, 331 epsilon, h0, limiting_threshold); 320 332 321 333 // Maximal and minimal wave speeds … … 324 336 325 337 // Code to use fast square root optimisation if desired. 338 // Timings on AMD 64 for the Okushiri profile gave the following timings 339 // 340 // SQRT Total Flux 341 //============================= 342 // 343 // Ref 405s 152s 344 // Fast (dbl) 453s 173s 345 // Fast (sng) 437s 171s 346 // 347 // Consequently, there is currently (14/5/2009) no reason to use this 348 // approximation. 349 326 350 //soundspeed_left = fast_squareroot_approximation(g*h_left); 327 351 //soundspeed_right = fast_squareroot_approximation(g*h_right); … … 351 375 denom = s_max - s_min; 352 376 if (denom < epsilon) 353 { // FIXME (Ole): Try using H0 here377 { // FIXME (Ole): Try using h0 here 354 378 memset(edgeflux, 0, 3*sizeof(double)); 355 379 *max_speed = 0.0; … … 413 437 414 438 double h0 = H0*H0; //This ensures a good balance when h approaches H0. 415 439 double limiting_threshold = 10*H0; // Avoid applying limiter below this 440 416 441 //Copy conserved quantities to protect from modification 417 442 for (i=0; i<3; i++) { … … 430 455 h_left = w_left-z; 431 456 uh_left = q_left_rotated[1]; 432 u_left =_compute_speed(&uh_left, &h_left, epsilon, h0); 457 u_left =_compute_speed(&uh_left, &h_left, 458 epsilon, h0, limiting_threshold); 433 459 434 460 w_right = q_right_rotated[0]; 435 461 h_right = w_right-z; 436 462 uh_right = q_right_rotated[1]; 437 u_right =_compute_speed(&uh_right, &h_right, epsilon, h0); 463 u_right =_compute_speed(&uh_right, &h_right, 464 epsilon, h0, limiting_threshold); 438 465 439 466 … … 656 683 // FIXME: Try with this one precomputed 657 684 for (i=0; i<3; i++) { 658 685 dz = max(dz, fabs(zv[k3+i]-zc[k])); 659 686 } 660 687 } … … 684 711 685 712 if (dz > 0.0) { 686 713 alpha = max( min( alpha_balance*hmin/dz, 1.0), 0.0 ); 687 714 } else { 688 715 alpha = 1.0; // Flat bed 689 716 } 690 717 //printf("Using old style limiter\n"); … … 699 726 700 727 if (hmin < H0) { 701 alpha = 1.0; 702 for (i=0; i<3; i++) { 703 704 h_diff = hc_k - hv[i]; 705 if (h_diff <= 0) { 706 // Deep water triangle is further away from bed than 707 // shallow water (hbar < h). Any alpha will do 708 709 } else { 710 // Denominator is positive which means that we need some of the 711 // h-limited stage. 712 713 alpha = min(alpha, (hc_k - H0)/h_diff); 728 alpha = 1.0; 729 for (i=0; i<3; i++) { 730 731 h_diff = hc_k - hv[i]; 732 if (h_diff <= 0) { 733 // Deep water triangle is further away from bed than 734 // shallow water (hbar < h). Any alpha will do 735 736 } else { 737 // Denominator is positive which means that we need some of the 738 // h-limited stage. 739 740 alpha = min(alpha, (hc_k - H0)/h_diff); 741 } 742 } 743 744 // Ensure alpha in [0,1] 745 if (alpha>1.0) alpha=1.0; 746 if (alpha<0.0) alpha=0.0; 747 748 } else { 749 // Use w-limited stage exclusively in deeper water. 750 alpha = 1.0; 714 751 } 715 752 } 716 717 // Ensure alpha in [0,1] 718 if (alpha>1.0) alpha=1.0; 719 if (alpha<0.0) alpha=0.0; 720 721 } else { 722 // Use w-limited stage exclusively in deeper water. 723 alpha = 1.0; 724 } 725 } 726 727 753 754 728 755 // Let 729 756 // … … 743 770 // Momentum is balanced between constant and limited 744 771 745 772 746 773 if (alpha < 1) { 747 774 for (i=0; i<3; i++) { 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 775 776 wv[k3+i] = zv[k3+i] + (1-alpha)*hc_k + alpha*hv[i]; 777 778 // Update momentum at vertices 779 if (use_centroid_velocities == 1) { 780 // This is a simple, efficient and robust option 781 // It uses first order approximation of velocities, but retains 782 // the order used by stage. 783 784 // Speeds at centroids 785 if (hc_k > epsilon) { 786 uc = xmomc[k]/hc_k; 787 vc = ymomc[k]/hc_k; 788 } else { 789 uc = 0.0; 790 vc = 0.0; 791 } 792 793 // Vertex momenta guaranteed to be consistent with depth guaranteeing 794 // controlled speed 795 hv[i] = wv[k3+i] - zv[k3+i]; // Recompute (balanced) vertex depth 796 xmomv[k3+i] = uc*hv[i]; 797 ymomv[k3+i] = vc*hv[i]; 798 799 } else { 800 // Update momentum as a linear combination of 801 // xmomc and ymomc (shallow) and momentum 802 // from extrapolator xmomv and ymomv (deep). 803 // This assumes that values from xmomv and ymomv have 804 // been established e.g. by the gradient limiter. 805 806 // FIXME (Ole): I think this should be used with vertex momenta 807 // computed above using centroid_velocities instead of xmomc 808 // and ymomc as they'll be more representative first order 809 // values. 810 811 xmomv[k3+i] = (1-alpha)*xmomc[k] + alpha*xmomv[k3+i]; 812 ymomv[k3+i] = (1-alpha)*ymomc[k] + alpha*ymomv[k3+i]; 813 814 } 788 815 } 789 816 } … … 816 843 if (hc < minimum_allowed_height) { 817 844 818 819 820 821 845 // Set momentum to zero and ensure h is non negative 846 xmomc[k] = 0.0; 847 ymomc[k] = 0.0; 848 if (hc <= 0.0) wc[k] = zc[k]; 822 849 } 823 850 } … … 827 854 for (k=0; k<N; k++) { 828 855 hc = wc[k] - zc[k]; 829 856 830 857 if (hc < minimum_allowed_height) { 831 858 … … 839 866 } else { 840 867 //Reduce excessive speeds derived from division by small hc 841 842 868 //FIXME (Ole): This may be unnecessary with new slope limiters 869 //in effect. 843 870 844 871 u = xmomc[k]/hc; 845 846 847 848 849 850 851 872 if (fabs(u) > maximum_allowed_speed) { 873 reduced_speed = maximum_allowed_speed * u/fabs(u); 874 //printf("Speed (u) has been reduced from %.3f to %.3f\n", 875 // u, reduced_speed); 876 xmomc[k] = reduced_speed * hc; 877 } 878 852 879 v = ymomc[k]/hc; 853 854 855 856 857 858 880 if (fabs(v) > maximum_allowed_speed) { 881 reduced_speed = maximum_allowed_speed * v/fabs(v); 882 //printf("Speed (v) has been reduced from %.3f to %.3f\n", 883 // v, reduced_speed); 884 ymomc[k] = reduced_speed * hc; 885 } 859 886 } 860 887 } … … 914 941 PyArrayObject *normal, *ql, *qr, *edgeflux; 915 942 double g, epsilon, max_speed, H0, zl, zr; 943 double h0, limiting_threshold; 916 944 917 945 if (!PyArg_ParseTuple(args, "OOOddOddd", … … 923 951 924 952 953 h0 = H0*H0; // This ensures a good balance when h approaches H0. 954 // But evidence suggests that h0 can be as little as 955 // epsilon! 956 957 limiting_threshold = 10*H0; // Avoid applying limiter below this 958 // threshold for performance reasons. 959 // See ANUGA manual under flux limiting 960 925 961 _flux_function_central((double*) ql -> data, 926 (double*) qr -> data, 927 zl, 928 zr, 929 ((double*) normal -> data)[0], 930 ((double*) normal -> data)[1], 931 epsilon, H0, g, 932 (double*) edgeflux -> data, 933 &max_speed); 962 (double*) qr -> data, 963 zl, 964 zr, 965 ((double*) normal -> data)[0], 966 ((double*) normal -> data)[1], 967 epsilon, h0, limiting_threshold, 968 g, 969 (double*) edgeflux -> data, 970 &max_speed); 934 971 935 972 return Py_BuildValue("d", max_speed); … … 1808 1845 // Local variables 1809 1846 double max_speed, length, inv_area, zl, zr; 1847 double h0 = H0*H0; // This ensures a good balance when h approaches H0. 1848 1849 double limiting_threshold = 10*H0; // Avoid applying limiter below this 1850 // threshold for performance reasons. 1851 // See ANUGA manual under flux limiting 1810 1852 int k, i, m, n; 1811 1853 int ki, nm=0, ki2; // Index shorthands 1854 1812 1855 1813 1856 // Workspace (making them static actually made function slightly slower (Ole)) … … 1815 1858 1816 1859 static long call = 1; // Static local variable flagging already computed flux 1817 1860 1818 1861 // Start computation 1819 1862 call++; // Flag 'id' of flux calculation for this timestep … … 1901 1944 _flux_function_central(ql, qr, zl, zr, 1902 1945 normals[ki2], normals[ki2+1], 1903 epsilon, H0, g,1946 epsilon, h0, limiting_threshold, g, 1904 1947 edgeflux, &max_speed); 1905 1948
Note: See TracChangeset
for help on using the changeset viewer.