[85] | 1 | /* |
---|
| 2 | Estimate bandwidth and latency of a parallel computer using MPI. |
---|
| 3 | Ole Moller Nielsen - 1998 |
---|
| 4 | */ |
---|
| 5 | |
---|
| 6 | #include <stdio.h> |
---|
| 7 | #include <stdlib.h> |
---|
| 8 | #include <mpi.h> |
---|
| 9 | |
---|
| 10 | |
---|
| 11 | #define MAXI 10 /* Number of blocks */ |
---|
| 12 | #define MAXM 500000 /* Largest block */ |
---|
| 13 | #define BLOCK MAXM/MAXI /* Block size */ |
---|
| 14 | |
---|
| 15 | |
---|
| 16 | double linfit(double* x, double* y, int N, double* a, double* b) |
---|
| 17 | { |
---|
| 18 | /* Given vectors y and x fit a and b to the model y = ax + b */ |
---|
| 19 | |
---|
| 20 | double Sx=0, Sy=0, SxoN=0, SSoN=0, t=0; |
---|
| 21 | double res, varest=0, norm=0; |
---|
| 22 | int i; |
---|
| 23 | |
---|
| 24 | for (i=0; i<N; i++) |
---|
| 25 | { |
---|
| 26 | /*printf("x,y = %f, %f\n",x[i],y[i]);*/ |
---|
| 27 | Sx = Sx + x[i]; |
---|
| 28 | Sy = Sy + y[i]; |
---|
| 29 | } |
---|
| 30 | |
---|
| 31 | SxoN = Sx/N; |
---|
| 32 | |
---|
| 33 | *a = 0.0; |
---|
| 34 | for (i=0; i<N; i++) |
---|
| 35 | { |
---|
| 36 | t = x[i] - SxoN; |
---|
| 37 | SSoN = SSoN + t*t; |
---|
| 38 | *a = *a + t*y[i]; |
---|
| 39 | } |
---|
| 40 | |
---|
| 41 | *a = (*a)/SSoN; /* a = (N Sxy - SxSy)/(NSxx - Sx^2) */ |
---|
| 42 | *b = (Sy - Sx*(*a))/N; |
---|
| 43 | |
---|
| 44 | /* Quality - variance estimate \sum_i r_i^2 /(m-n) */ |
---|
| 45 | for (i=0; i<N; i++) |
---|
| 46 | { |
---|
| 47 | norm = norm + x[i]*x[i]; |
---|
| 48 | res = y[i] - (*a)*x[i] - (*b); |
---|
| 49 | varest = varest + res*res; |
---|
| 50 | } |
---|
| 51 | varest = varest/norm/(N-2); |
---|
| 52 | return(varest); |
---|
| 53 | } |
---|
| 54 | |
---|
| 55 | main(int argc, char **argv) |
---|
| 56 | { |
---|
| 57 | int repeats = 10, msgid = 0; |
---|
| 58 | int myid, procs; |
---|
| 59 | int i,j,k,m; |
---|
| 60 | |
---|
| 61 | double t1, t2, cpuOH; |
---|
| 62 | double Tbw, Tlat; |
---|
| 63 | double varest; |
---|
| 64 | |
---|
| 65 | int noelem[MAXI]; |
---|
| 66 | double bytes[MAXI]; |
---|
| 67 | double mintime[MAXI]; |
---|
| 68 | double maxtime[MAXI]; |
---|
| 69 | double avgtime[MAXI]; |
---|
| 70 | double A[MAXM]; |
---|
| 71 | |
---|
| 72 | int namelen; |
---|
| 73 | char processor_name[MPI_MAX_PROCESSOR_NAME]; |
---|
| 74 | |
---|
| 75 | MPI_Status stat; |
---|
| 76 | |
---|
| 77 | |
---|
| 78 | /* Initialize */ |
---|
| 79 | |
---|
| 80 | MPI_Init(&argc,&argv); |
---|
| 81 | MPI_Comm_size(MPI_COMM_WORLD,&procs); |
---|
| 82 | MPI_Comm_rank(MPI_COMM_WORLD,&myid); |
---|
| 83 | MPI_Get_processor_name(processor_name,&namelen); |
---|
| 84 | |
---|
| 85 | if (myid==0) |
---|
| 86 | { |
---|
| 87 | printf("MAXM = %d, number of processors = %d\n",MAXM,procs); |
---|
| 88 | printf("Measurements are repeated %d times for reliability\n",repeats); |
---|
| 89 | } |
---|
| 90 | |
---|
| 91 | if (procs < 2) { |
---|
| 92 | printf("Program needs at least two processors - aborting\n"); |
---|
| 93 | MPI_Abort(MPI_COMM_WORLD,999); |
---|
| 94 | } |
---|
| 95 | |
---|
| 96 | MPI_Barrier(MPI_COMM_WORLD); /* Synchronize */ |
---|
| 97 | printf("I am process %d on %s\n",myid,processor_name); |
---|
| 98 | |
---|
| 99 | for (j=0; j<MAXM; j++) |
---|
| 100 | { |
---|
| 101 | A[j]=rand(); |
---|
| 102 | } |
---|
| 103 | for (i=0; i<MAXI; i++) |
---|
| 104 | { |
---|
| 105 | avgtime[i] = 0; |
---|
| 106 | mintime[i] = 1000000; |
---|
| 107 | maxtime[i] = -1000000; |
---|
| 108 | } |
---|
| 109 | |
---|
| 110 | /* Determine timer overhead */ |
---|
| 111 | if (myid == 0) { |
---|
| 112 | cpuOH = 1.0; |
---|
| 113 | for (k=0; k<repeats; k++) /* Repeat to get reliable timings */ |
---|
| 114 | { |
---|
| 115 | t1 = MPI_Wtime(); |
---|
| 116 | t2 = MPI_Wtime(); |
---|
| 117 | if (t2-t1 < cpuOH) cpuOH = t2-t1; |
---|
| 118 | } |
---|
| 119 | printf("Timing overhead is %f seconds\n\n", cpuOH); |
---|
| 120 | } |
---|
| 121 | |
---|
| 122 | |
---|
| 123 | |
---|
| 124 | /* Pass msg circularly */ |
---|
| 125 | |
---|
| 126 | for (k=0; k<repeats; k++) { |
---|
| 127 | if (myid == 0) { |
---|
| 128 | printf("Run %d of %d\n", k+1, repeats); |
---|
| 129 | } |
---|
| 130 | |
---|
| 131 | for (i=0; i<MAXI; i++) { |
---|
| 132 | /*m=BLOCK*(i+1);*/ |
---|
| 133 | m=BLOCK*i+1; |
---|
| 134 | |
---|
| 135 | noelem[i] = m; |
---|
| 136 | |
---|
| 137 | MPI_Barrier(MPI_COMM_WORLD); /* Synchronize */ |
---|
| 138 | |
---|
| 139 | if (myid == 0) { |
---|
| 140 | t1=MPI_Wtime(); |
---|
| 141 | MPI_Send(&A[0],m,MPI_DOUBLE,1,msgid,MPI_COMM_WORLD); |
---|
| 142 | MPI_Recv(&A[0],m,MPI_DOUBLE,procs-1,msgid,MPI_COMM_WORLD,&stat); |
---|
| 143 | t2=MPI_Wtime() - t1 - cpuOH; |
---|
| 144 | t2 = t2/procs; |
---|
| 145 | avgtime[i] = avgtime[i] + t2; |
---|
| 146 | if (t2 < mintime[i]) mintime[i] = t2; |
---|
| 147 | if (t2 > maxtime[i]) maxtime[i] = t2; |
---|
| 148 | } else { |
---|
| 149 | MPI_Recv(&A[0],m,MPI_DOUBLE,myid-1,msgid,MPI_COMM_WORLD,&stat); |
---|
| 150 | MPI_Send(&A[0],m,MPI_DOUBLE,(myid+1)%procs,msgid,MPI_COMM_WORLD); |
---|
| 151 | } |
---|
| 152 | } |
---|
| 153 | } |
---|
| 154 | |
---|
| 155 | if (myid == 0) { |
---|
| 156 | printf("Bytes transferred time (micro seconds)\n"); |
---|
| 157 | printf(" min avg max \n"); |
---|
| 158 | printf("----------------------------------------------\n"); |
---|
| 159 | |
---|
| 160 | for (i=0; i<MAXI; i++) { |
---|
| 161 | avgtime[i] = avgtime[i]/repeats*1.0e6; /*Average micro seconds*/ |
---|
| 162 | mintime[i] = mintime[i]*1.0e6; /*Min micro seconds*/ |
---|
| 163 | maxtime[i] = maxtime[i]*1.0e6; /*Min micro seconds*/ |
---|
| 164 | |
---|
| 165 | m = noelem[i]; |
---|
| 166 | bytes[i] = (double) 8*noelem[i]; |
---|
| 167 | |
---|
| 168 | /* printf("m=%d, time(min)=%lf, time(avg)=%lf, time(max)=%lf\n", |
---|
| 169 | m,mintime[i],avgtime[i],maxtime[i]); */ |
---|
| 170 | printf("%10d %10d %10d %10d\n", |
---|
| 171 | (int) bytes[i], (int) mintime[i], (int) avgtime[i], (int)maxtime[i]); |
---|
| 172 | } |
---|
| 173 | |
---|
| 174 | varest=linfit(bytes, mintime, MAXI, &Tbw, &Tlat); |
---|
| 175 | printf("\nLinear regression on best timings (t = t_l + t_b * bytes):\n"); |
---|
| 176 | |
---|
| 177 | printf(" t_b = %f\n t_l = %f\n", Tbw, Tlat); |
---|
| 178 | printf(" Estimated relative variance = %.9f\n\n",varest); |
---|
| 179 | |
---|
| 180 | printf("Estimated bandwith (1/t_b): %.3f Mb/s\n", (1.0/Tbw)); |
---|
| 181 | printf("Estimated latency: %d micro s\n", |
---|
| 182 | (int) (mintime[0] - (float) bytes[0]* (float)Tbw)); |
---|
| 183 | |
---|
| 184 | } |
---|
| 185 | |
---|
| 186 | MPI_Finalize(); |
---|
| 187 | } |
---|
| 188 | |
---|