1 | /* |
---|
2 | Estimate bandwidth and latency of a parallel computer using MPI. |
---|
3 | Ole Moller Nielsen - 1998 |
---|
4 | */ |
---|
5 | |
---|
6 | #include <stdio.h> |
---|
7 | #include <stdlib.h> |
---|
8 | #include <mpi.h> |
---|
9 | |
---|
10 | |
---|
11 | #define MAXI 10 /* Number of blocks */ |
---|
12 | #define MAXM 500000 /* Largest block */ |
---|
13 | #define BLOCK MAXM/MAXI /* Block size */ |
---|
14 | |
---|
15 | |
---|
16 | double linfit(double* x, double* y, int N, double* a, double* b) |
---|
17 | { |
---|
18 | /* Given vectors y and x fit a and b to the model y = ax + b */ |
---|
19 | |
---|
20 | double Sx=0, Sy=0, SxoN=0, SSoN=0, t=0; |
---|
21 | double res, varest=0, norm=0; |
---|
22 | int i; |
---|
23 | |
---|
24 | for (i=0; i<N; i++) |
---|
25 | { |
---|
26 | /*printf("x,y = %f, %f\n",x[i],y[i]);*/ |
---|
27 | Sx = Sx + x[i]; |
---|
28 | Sy = Sy + y[i]; |
---|
29 | } |
---|
30 | |
---|
31 | SxoN = Sx/N; |
---|
32 | |
---|
33 | *a = 0.0; |
---|
34 | for (i=0; i<N; i++) |
---|
35 | { |
---|
36 | t = x[i] - SxoN; |
---|
37 | SSoN = SSoN + t*t; |
---|
38 | *a = *a + t*y[i]; |
---|
39 | } |
---|
40 | |
---|
41 | *a = (*a)/SSoN; /* a = (N Sxy - SxSy)/(NSxx - Sx^2) */ |
---|
42 | *b = (Sy - Sx*(*a))/N; |
---|
43 | |
---|
44 | /* Quality - variance estimate \sum_i r_i^2 /(m-n) */ |
---|
45 | for (i=0; i<N; i++) |
---|
46 | { |
---|
47 | norm = norm + x[i]*x[i]; |
---|
48 | res = y[i] - (*a)*x[i] - (*b); |
---|
49 | varest = varest + res*res; |
---|
50 | } |
---|
51 | varest = varest/norm/(N-2); |
---|
52 | return(varest); |
---|
53 | } |
---|
54 | |
---|
55 | main(int argc, char **argv) |
---|
56 | { |
---|
57 | int repeats = 10, msgid = 0; |
---|
58 | int myid, procs; |
---|
59 | int i,j,k,m; |
---|
60 | |
---|
61 | double t1, t2, cpuOH; |
---|
62 | double Tbw, Tlat; |
---|
63 | double varest; |
---|
64 | |
---|
65 | int noelem[MAXI]; |
---|
66 | double bytes[MAXI]; |
---|
67 | double mintime[MAXI]; |
---|
68 | double maxtime[MAXI]; |
---|
69 | double avgtime[MAXI]; |
---|
70 | double A[MAXM]; |
---|
71 | |
---|
72 | int namelen; |
---|
73 | char processor_name[MPI_MAX_PROCESSOR_NAME]; |
---|
74 | |
---|
75 | MPI_Status stat; |
---|
76 | |
---|
77 | |
---|
78 | /* Initialize */ |
---|
79 | |
---|
80 | MPI_Init(&argc,&argv); |
---|
81 | MPI_Comm_size(MPI_COMM_WORLD,&procs); |
---|
82 | MPI_Comm_rank(MPI_COMM_WORLD,&myid); |
---|
83 | MPI_Get_processor_name(processor_name,&namelen); |
---|
84 | |
---|
85 | if (myid==0) |
---|
86 | { |
---|
87 | printf("MAXM = %d, number of processors = %d\n",MAXM,procs); |
---|
88 | printf("Measurements are repeated %d times for reliability\n",repeats); |
---|
89 | } |
---|
90 | |
---|
91 | if (procs < 2) { |
---|
92 | printf("Program needs at least two processors - aborting\n"); |
---|
93 | MPI_Abort(MPI_COMM_WORLD,999); |
---|
94 | } |
---|
95 | |
---|
96 | MPI_Barrier(MPI_COMM_WORLD); /* Synchronize */ |
---|
97 | printf("I am process %d on %s\n",myid,processor_name); |
---|
98 | |
---|
99 | for (j=0; j<MAXM; j++) |
---|
100 | { |
---|
101 | A[j]=rand(); |
---|
102 | } |
---|
103 | for (i=0; i<MAXI; i++) |
---|
104 | { |
---|
105 | avgtime[i] = 0; |
---|
106 | mintime[i] = 1000000; |
---|
107 | maxtime[i] = -1000000; |
---|
108 | } |
---|
109 | |
---|
110 | /* Determine timer overhead */ |
---|
111 | if (myid == 0) { |
---|
112 | cpuOH = 1.0; |
---|
113 | for (k=0; k<repeats; k++) /* Repeat to get reliable timings */ |
---|
114 | { |
---|
115 | t1 = MPI_Wtime(); |
---|
116 | t2 = MPI_Wtime(); |
---|
117 | if (t2-t1 < cpuOH) cpuOH = t2-t1; |
---|
118 | } |
---|
119 | printf("Timing overhead is %f seconds\n\n", cpuOH); |
---|
120 | } |
---|
121 | |
---|
122 | |
---|
123 | |
---|
124 | /* Pass msg circularly */ |
---|
125 | |
---|
126 | for (k=0; k<repeats; k++) { |
---|
127 | if (myid == 0) { |
---|
128 | printf("Run %d of %d\n", k+1, repeats); |
---|
129 | } |
---|
130 | |
---|
131 | for (i=0; i<MAXI; i++) { |
---|
132 | /*m=BLOCK*(i+1);*/ |
---|
133 | m=BLOCK*i+1; |
---|
134 | |
---|
135 | noelem[i] = m; |
---|
136 | |
---|
137 | MPI_Barrier(MPI_COMM_WORLD); /* Synchronize */ |
---|
138 | |
---|
139 | if (myid == 0) { |
---|
140 | t1=MPI_Wtime(); |
---|
141 | MPI_Send(&A[0],m,MPI_DOUBLE,1,msgid,MPI_COMM_WORLD); |
---|
142 | MPI_Recv(&A[0],m,MPI_DOUBLE,procs-1,msgid,MPI_COMM_WORLD,&stat); |
---|
143 | t2=MPI_Wtime() - t1 - cpuOH; |
---|
144 | t2 = t2/procs; |
---|
145 | avgtime[i] = avgtime[i] + t2; |
---|
146 | if (t2 < mintime[i]) mintime[i] = t2; |
---|
147 | if (t2 > maxtime[i]) maxtime[i] = t2; |
---|
148 | } else { |
---|
149 | MPI_Recv(&A[0],m,MPI_DOUBLE,myid-1,msgid,MPI_COMM_WORLD,&stat); |
---|
150 | MPI_Send(&A[0],m,MPI_DOUBLE,(myid+1)%procs,msgid,MPI_COMM_WORLD); |
---|
151 | } |
---|
152 | } |
---|
153 | } |
---|
154 | |
---|
155 | if (myid == 0) { |
---|
156 | printf("Bytes transferred time (micro seconds)\n"); |
---|
157 | printf(" min avg max \n"); |
---|
158 | printf("----------------------------------------------\n"); |
---|
159 | |
---|
160 | for (i=0; i<MAXI; i++) { |
---|
161 | avgtime[i] = avgtime[i]/repeats*1.0e6; /*Average micro seconds*/ |
---|
162 | mintime[i] = mintime[i]*1.0e6; /*Min micro seconds*/ |
---|
163 | maxtime[i] = maxtime[i]*1.0e6; /*Min micro seconds*/ |
---|
164 | |
---|
165 | m = noelem[i]; |
---|
166 | bytes[i] = (double) 8*noelem[i]; |
---|
167 | |
---|
168 | /* printf("m=%d, time(min)=%lf, time(avg)=%lf, time(max)=%lf\n", |
---|
169 | m,mintime[i],avgtime[i],maxtime[i]); */ |
---|
170 | printf("%10d %10d %10d %10d\n", |
---|
171 | (int) bytes[i], (int) mintime[i], (int) avgtime[i], (int)maxtime[i]); |
---|
172 | } |
---|
173 | |
---|
174 | varest=linfit(bytes, mintime, MAXI, &Tbw, &Tlat); |
---|
175 | printf("\nLinear regression on best timings (t = t_l + t_b * bytes):\n"); |
---|
176 | |
---|
177 | printf(" t_b = %f\n t_l = %f\n", Tbw, Tlat); |
---|
178 | printf(" Estimated relative variance = %.9f\n\n",varest); |
---|
179 | |
---|
180 | printf("Estimated bandwith (1/t_b): %.3f Mb/s\n", (1.0/Tbw)); |
---|
181 | printf("Estimated latency: %d micro s\n", |
---|
182 | (int) (mintime[0] - (float) bytes[0]* (float)Tbw)); |
---|
183 | |
---|
184 | } |
---|
185 | |
---|
186 | MPI_Finalize(); |
---|
187 | } |
---|
188 | |
---|