1 | #!/usr/bin/env python |
---|
2 | |
---|
3 | ''' |
---|
4 | A program to compare two SWW files for "equality". |
---|
5 | |
---|
6 | This program makes lots of assumptions about the structure of the SWW files, |
---|
7 | so if that structure changes, this program must change. |
---|
8 | ''' |
---|
9 | |
---|
10 | import sys |
---|
11 | import os |
---|
12 | import os.path |
---|
13 | import getopt |
---|
14 | from Scientific.IO.NetCDF import NetCDFFile |
---|
15 | import numpy as num |
---|
16 | from anuga.config import netcdf_mode_r |
---|
17 | |
---|
18 | |
---|
19 | ##### |
---|
20 | # Various constants. |
---|
21 | ##### |
---|
22 | |
---|
23 | # Global for the '-q' quiet flag |
---|
24 | quiet = None |
---|
25 | |
---|
26 | # default tolerances - same as numpy defaults |
---|
27 | default_abs_tolerance = 1.0e-08 |
---|
28 | default_rel_tolerance = 1.0000000000000001e-05 |
---|
29 | |
---|
30 | # Global attributes that should exist and be same in both files |
---|
31 | # Don't have to have all of these, and we don't care about others. |
---|
32 | expect_global_attributes = ['smoothing', 'vertices_are_stored_uniquely', |
---|
33 | 'order', 'starttime', |
---|
34 | 'xllcorner', 'yllcorner', |
---|
35 | 'zone', 'false_easting', 'false_northing', |
---|
36 | 'datum', 'projection', 'units'] |
---|
37 | |
---|
38 | # dimensions expected, with expected values (None means unknown) |
---|
39 | expected_dimensions = {'number_of_volumes': None, |
---|
40 | 'number_of_vertices': 3, |
---|
41 | 'numbers_in_range': 2, |
---|
42 | 'number_of_points': None, |
---|
43 | 'number_of_timesteps': None} |
---|
44 | |
---|
45 | # Variables expected, with expected dimensions. |
---|
46 | # Don't have to have all of these, and we don't care about others. |
---|
47 | expected_variables = {'x': ('number_of_points',), |
---|
48 | 'y': ('number_of_points',), |
---|
49 | 'elevation': ('number_of_points',), |
---|
50 | 'elevation_range': ('numbers_in_range',), |
---|
51 | 'z': ('number_of_points',), |
---|
52 | 'volumes': ('number_of_volumes', 'number of vertices'), |
---|
53 | 'time': ('number_of_timesteps',), |
---|
54 | 'stage': ('number_of_timesteps', 'numbers_of_points',), |
---|
55 | 'stage_range': ('numbers_in_range',), |
---|
56 | 'xmomentum': ('number_of_timesteps', 'number_of_points'), |
---|
57 | 'xmomentum_range': ('numbers_in_range'), |
---|
58 | 'ymomentum': ('number_of_timesteps', 'number_of_points'), |
---|
59 | 'ymomentum_range': ('numbers_in_range')} |
---|
60 | |
---|
61 | ## |
---|
62 | # @brief An exception to inform user of usage problems. |
---|
63 | class Usage(Exception): |
---|
64 | def __init__(self, msg): |
---|
65 | self.msg = msg |
---|
66 | |
---|
67 | |
---|
68 | ## |
---|
69 | # @brief Compare two SWW files. |
---|
70 | # @param files A tuple of two filenames. |
---|
71 | # @param globals A list of global attribute names to compare. |
---|
72 | # @param timesteps A list of timesteps to compare at. |
---|
73 | # @param variables A list of variable names to compare. |
---|
74 | # @return Returns if files 'equal', else raises RuntimeError. |
---|
75 | def files_are_the_same(files, globals=None, timesteps=None, variables=None, |
---|
76 | rel_tolerance=default_rel_tolerance, |
---|
77 | abs_tolerance=default_abs_tolerance): |
---|
78 | # split out the filenames and check they exist |
---|
79 | (file1, file2) = files |
---|
80 | filename1 = os.path.basename(file1) |
---|
81 | filename2 = os.path.basename(file2) |
---|
82 | # width = max(len(filename1), len(filename2)) |
---|
83 | # filename1 = filename1.rjust(width) |
---|
84 | # filename2 = filename2.rjust(width) |
---|
85 | |
---|
86 | error = False |
---|
87 | error_msg = '' |
---|
88 | |
---|
89 | try: |
---|
90 | fid1 = NetCDFFile(file1, netcdf_mode_r) |
---|
91 | except: |
---|
92 | error_msg += "\nFile '%s' can't be opened?\n" % file1 |
---|
93 | error = True |
---|
94 | |
---|
95 | try: |
---|
96 | fid2 = NetCDFFile(file2, netcdf_mode_r) |
---|
97 | except: |
---|
98 | error_msg += "\nFile '%s' can't be opened?\n" % file2 |
---|
99 | error = True |
---|
100 | fid1.close() |
---|
101 | |
---|
102 | if error: |
---|
103 | raise RuntimeError, error_msg |
---|
104 | |
---|
105 | if globals is None: |
---|
106 | globals = expect_global_attributes |
---|
107 | |
---|
108 | ##### |
---|
109 | # First, check that files have the required structure |
---|
110 | ##### |
---|
111 | |
---|
112 | # dimensions - only check expected dimensions |
---|
113 | for key in expected_dimensions: |
---|
114 | if key not in fid1.dimensions.keys(): |
---|
115 | error_msg += ("\nFile %s doesn't contain dimension '%s'\n" |
---|
116 | % (filename1, key)) |
---|
117 | error = True |
---|
118 | if key not in fid2.dimensions.keys(): |
---|
119 | error_msg += ("\nFile %s doesn't contain dimension '%s'\n" |
---|
120 | % (filename2, key)) |
---|
121 | error = True |
---|
122 | |
---|
123 | # now check that dimensions are the same length |
---|
124 | # NOTE: DOESN'T CHECK 'UNLIMITED' DIMENSIONS YET! (get None at the moment) |
---|
125 | for dim in expected_dimensions: |
---|
126 | dim1_shape = fid1.dimensions.get(dim, None) |
---|
127 | dim2_shape = fid2.dimensions.get(dim, None) |
---|
128 | if dim1_shape != dim2_shape: |
---|
129 | error_msg += ("\nFile %s has '%s' dimension of size %s,\n" |
---|
130 | "file %s has that dimension of size %s\n" |
---|
131 | % (filename1, dim, str(dim1_shape), |
---|
132 | filename2, str(dim2_shape))) |
---|
133 | error = True |
---|
134 | |
---|
135 | # check that we have the required globals |
---|
136 | if globals: |
---|
137 | for glob in globals: |
---|
138 | if glob not in dir(fid1): |
---|
139 | error_msg += ("\nGlobal attribute '%s' isn't in file %s\n" |
---|
140 | % (glob, filename1)) |
---|
141 | error = True |
---|
142 | if glob not in dir(fid2): |
---|
143 | error_msg += ("\nGlobal attribute '%s' isn't in file %s\n" |
---|
144 | % (glob, filename2)) |
---|
145 | error = True |
---|
146 | else: |
---|
147 | # get list of global attributes |
---|
148 | glob_vars1 = [] |
---|
149 | glob_vars2 = [] |
---|
150 | for glob in expect_global_attributes: |
---|
151 | if glob in dir(fid1): |
---|
152 | glob_vars1.append(glob) |
---|
153 | if glob in dir(fid2): |
---|
154 | glob_vars2.append(glob) |
---|
155 | |
---|
156 | # now check attribute lists are same |
---|
157 | if glob_vars1 != glob_vars2: |
---|
158 | error_msg = ('\nFiles differ in global attributes:\n' |
---|
159 | '%s: %s,\n' |
---|
160 | '%s: %s\n' |
---|
161 | % (filename1, str(glob_vars1), |
---|
162 | filename2, str(glob_vars2))) |
---|
163 | error = True |
---|
164 | |
---|
165 | # get variables to test |
---|
166 | if variables: |
---|
167 | for var in variables: |
---|
168 | if var not in fid1.variables.keys(): |
---|
169 | error_msg += ("\nVariable '%s' isn't in file %s\n" |
---|
170 | % (var, filename1)) |
---|
171 | error = True |
---|
172 | if var not in fid2.variables.keys(): |
---|
173 | error_msg += ("\nVariable '%s' isn't in file %s\n" |
---|
174 | % (var, filename2)) |
---|
175 | error = True |
---|
176 | else: |
---|
177 | # check that variables are as expected in both files |
---|
178 | var_names1 = [] |
---|
179 | var_names2 = [] |
---|
180 | for var_name in expected_variables: |
---|
181 | if fid1.variables.has_key(var_name): |
---|
182 | var_names1.append(var_name) |
---|
183 | if fid2.variables.has_key(var_name): |
---|
184 | var_names2.append(var_name) |
---|
185 | |
---|
186 | if var_names1 != var_names2: |
---|
187 | error_msg += ('\nVariables are not the same between files:\n' |
---|
188 | '%s variables=%s,\n' |
---|
189 | '%s variables=%s\n' |
---|
190 | % (filename1, str(var_names1), filename2, str(var_names2))) |
---|
191 | error = True |
---|
192 | variables = var_names1 |
---|
193 | |
---|
194 | # get size of time dimension |
---|
195 | num_timesteps1 = fid1.variables['time'].shape |
---|
196 | num_timesteps2 = fid2.variables['time'].shape |
---|
197 | if num_timesteps1 != num_timesteps2: |
---|
198 | error_msg += ('Files have different number of timesteps:\n' |
---|
199 | '%s=%s,\n' |
---|
200 | '%s=%s\n' |
---|
201 | % (filename1, str(num_timesteps1), |
---|
202 | filename2, str(num_timesteps2))) |
---|
203 | error = True |
---|
204 | |
---|
205 | num_timesteps = num_timesteps1[0] |
---|
206 | |
---|
207 | # variable shapes same? |
---|
208 | for var_name in variables: |
---|
209 | var1 = fid1.variables[var_name] |
---|
210 | var2 = fid2.variables[var_name] |
---|
211 | var1_shape = var1.shape |
---|
212 | var2_shape = var2.shape |
---|
213 | if var1_shape != var2_shape: |
---|
214 | error_msg += ('Files differ in variable %s shape:\n' |
---|
215 | '%s: %s,\n' |
---|
216 | '%s: %s\n' |
---|
217 | % (var_name, filename1, str(var1_shape), |
---|
218 | filename2, str(var2_shape))) |
---|
219 | error = True |
---|
220 | |
---|
221 | if error: |
---|
222 | fid1.close() |
---|
223 | fid2.close() |
---|
224 | raise RuntimeError, error_msg |
---|
225 | |
---|
226 | ##### |
---|
227 | # Now check that actual data values are the same |
---|
228 | ##### |
---|
229 | |
---|
230 | error_msg = '' |
---|
231 | glob_vars_bad = {} |
---|
232 | data_vars_bad = {} |
---|
233 | |
---|
234 | # check values of global attributes |
---|
235 | for glob_name in globals: |
---|
236 | if getattr(fid1, glob_name) != getattr(fid2, glob_name): |
---|
237 | print("\nFiles differ in global '%s':\n" |
---|
238 | "%s: '%s',\n" |
---|
239 | "%s: '%s'" |
---|
240 | % (glob_name, filename1, str(g1), filename2, str(g2))) |
---|
241 | glob_vars_bad[glob_name] = glob_vars_bad.get(glob_name, 0) + 1 |
---|
242 | error = True |
---|
243 | |
---|
244 | # check data variables, be clever with time series data |
---|
245 | max_rel_difference = -1 |
---|
246 | diff_count = 0 |
---|
247 | for var_name in variables: |
---|
248 | var_dims = expected_variables[var_name] |
---|
249 | if (len(var_dims) > 1) and (var_dims[0] == 'number_of_timesteps'): |
---|
250 | # time series, check by timestep block |
---|
251 | for t in xrange(num_timesteps): |
---|
252 | var1 = num.array(fid1.variables[var_name][t,:]) |
---|
253 | var2 = num.array(fid2.variables[var_name][t,:]) |
---|
254 | if not num.allclose(var1, var2, |
---|
255 | rtol=rel_tolerance, atol=abs_tolerance): |
---|
256 | error = True |
---|
257 | for i in xrange(len(var1)): |
---|
258 | if not num.allclose(var1[i], var2[i], |
---|
259 | rtol=rel_tolerance, |
---|
260 | atol=abs_tolerance): |
---|
261 | abs_difference = num.abs(var1[i]-var2[i]) |
---|
262 | max_a_b = num.max(num.abs(var1[i]), |
---|
263 | num.abs(var2[i])) |
---|
264 | rel_difference = num.abs(abs_difference/max_a_b) |
---|
265 | |
---|
266 | if not quiet: |
---|
267 | print('\nFiles differ in variable ' |
---|
268 | '%s[%d,%d]:\n' |
---|
269 | '%s: %f\n' |
---|
270 | '%s: %f\n' |
---|
271 | 'abs. difference=%e, rel. difference=%e\n' |
---|
272 | % (var_name, t, i, |
---|
273 | filename1, var1[i], |
---|
274 | filename2, var2[i], |
---|
275 | abs_difference, |
---|
276 | rel_difference)) |
---|
277 | |
---|
278 | if rel_difference > max_rel_difference: |
---|
279 | max_rel_difference = rel_difference |
---|
280 | max_rel_difference_abs = abs_difference |
---|
281 | max_rel_difference_a = var1[i] |
---|
282 | max_rel_difference_b = var2[i] |
---|
283 | |
---|
284 | data_vars_bad[var_name] = data_vars_bad.get(var_name, 0) + 1 |
---|
285 | diff_count += 1 |
---|
286 | else: |
---|
287 | # simple data, check whole thing at once |
---|
288 | var1 = num.array(fid1.variables[var_name][:]) |
---|
289 | var2 = num.array(fid2.variables[var_name][:]) |
---|
290 | if not num.allclose(var1, var2, |
---|
291 | rtol=rel_tolerance, atol=abs_tolerance): |
---|
292 | for j in xrange(len(var1)): |
---|
293 | if not num.allclose(var1[j], var2[j], |
---|
294 | rtol=rel_tolerance, atol=abs_tolerance): |
---|
295 | abs_difference = num.abs(var1[j]-var2[j]) |
---|
296 | max_a_b = num.max(num.abs(var1[j]), |
---|
297 | num.abs(var2[j])) |
---|
298 | rel_difference = num.abs(abs_difference/max_a_b) |
---|
299 | |
---|
300 | if not quiet: |
---|
301 | print('\nFiles differ in variable ' |
---|
302 | '%s[%d]:\n' |
---|
303 | '%s: %f\n' |
---|
304 | '%s: %f\n' |
---|
305 | 'abs. difference=%e, rel. difference=%e\n' |
---|
306 | % (var_name, j, |
---|
307 | filename1, var1[j], |
---|
308 | filename2, var2[j], |
---|
309 | abs_difference, |
---|
310 | rel_difference)) |
---|
311 | |
---|
312 | if rel_difference > max_rel_difference: |
---|
313 | max_rel_difference = rel_difference |
---|
314 | max_rel_difference_abs = abs_difference |
---|
315 | max_rel_difference_a = var1[j] |
---|
316 | max_rel_difference_b = var2[j] |
---|
317 | |
---|
318 | data_vars_bad[var_name] = data_vars_bad.get(var_name, 0) + 1 |
---|
319 | diff_count += 1 |
---|
320 | error = True |
---|
321 | |
---|
322 | ##### |
---|
323 | # close files and signal OK or ERROR |
---|
324 | ##### |
---|
325 | |
---|
326 | fid1.close() |
---|
327 | fid2.close() |
---|
328 | |
---|
329 | if error: |
---|
330 | error_msg += ('\nNumber of data differences=%d\n' |
---|
331 | 'Maximum relative data difference=%e\n' |
---|
332 | 'associated absolute difference=%e\n' |
---|
333 | "associated 'a' value=%e\n" |
---|
334 | "associated 'b' value=%e\n" |
---|
335 | % (diff_count, max_rel_difference, max_rel_difference_abs, |
---|
336 | max_rel_difference_a, max_rel_difference_b)) |
---|
337 | error_msg += ('\nglob_vars bad=%s\n' % str(glob_vars_bad)) |
---|
338 | error_msg += ('\ndata_vars bad=%s\n' % str(data_vars_bad)) |
---|
339 | raise RuntimeError, error_msg |
---|
340 | |
---|
341 | return |
---|
342 | |
---|
343 | |
---|
344 | ## |
---|
345 | # @brief Return a usage string. |
---|
346 | def usage(): |
---|
347 | result = [] |
---|
348 | a = result.append |
---|
349 | a('Usage: %s <options> <file1> <file2>\n' % ProgName) |
---|
350 | a('where <options> is zero or more of:\n') |
---|
351 | a(' -h print this help\n') |
---|
352 | a(' -q be quiet, print only summary of differences\n') |
---|
353 | a(" -a <val> set absolute threshold of 'equivalent'\n") |
---|
354 | a(" -r <val> set relative threshold of 'equivalent'\n") |
---|
355 | a(' -g <arg> check only global attributes specified\n') |
---|
356 | a(' <arg> has the form <globname>[,<globname>[,...]]\n') |
---|
357 | a(' -t <arg> check only timesteps specified\n') |
---|
358 | a(' <arg> has the form <starttime>[,<stoptime>[,<step>]]\n') |
---|
359 | a(' -v <arg> check only the named variables\n') |
---|
360 | a(' <arg> has the form <varname>[,<varname>[,...]]\n') |
---|
361 | a('and <file1> and <file2> are two SWW files to compare.\n') |
---|
362 | a('\n') |
---|
363 | a('The program exit status is one of:\n') |
---|
364 | a(' 0 the two files are equivalent\n') |
---|
365 | a(' else the files are not equivalent.') |
---|
366 | return ''.join(result) |
---|
367 | |
---|
368 | ## |
---|
369 | # @brief Print a message to stderr. |
---|
370 | def warn(msg): |
---|
371 | print >>sys.stderr, msg |
---|
372 | |
---|
373 | |
---|
374 | ## |
---|
375 | # @brief |
---|
376 | # @param argv |
---|
377 | # @return The status code the program will exit with. |
---|
378 | def main(argv=None): |
---|
379 | global quiet |
---|
380 | |
---|
381 | if argv is None: |
---|
382 | argv = sys.argv |
---|
383 | |
---|
384 | try: |
---|
385 | try: |
---|
386 | opts, args = getopt.getopt(argv[1:], 'hqa:g:r:t:v:', |
---|
387 | ['help', 'globals', |
---|
388 | 'variables', 'timesteps']) |
---|
389 | except getopt.error, msg: |
---|
390 | raise Usage(msg) |
---|
391 | except Usage, err: |
---|
392 | print >>sys.stderr, err.msg |
---|
393 | print >>sys.stderr, "for help use --help" |
---|
394 | return 2 |
---|
395 | |
---|
396 | # process options |
---|
397 | globals = None |
---|
398 | timesteps = None |
---|
399 | variables = None |
---|
400 | quiet = False |
---|
401 | rel_tolerance = default_rel_tolerance |
---|
402 | abs_tolerance = default_abs_tolerance |
---|
403 | for opt, arg in opts: |
---|
404 | if opt in ('-h', '--help'): |
---|
405 | print usage() |
---|
406 | sys.exit(0) |
---|
407 | elif opt in ('-q', '--quiet'): |
---|
408 | quiet = True |
---|
409 | elif opt in ('-a', '--absolute'): |
---|
410 | abs_tolerance = float(arg) |
---|
411 | elif opt in ('-r', '--relative'): |
---|
412 | rel_tolerance = float(arg) |
---|
413 | elif opt in ('-g', '--globals'): |
---|
414 | globals = arg.split(',') |
---|
415 | elif opt in ('-t', '--timesteps'): |
---|
416 | timesteps = arg.split(',') |
---|
417 | elif opt in ('-v', '--variables'): |
---|
418 | variables = arg.split(',') |
---|
419 | |
---|
420 | # process arguments |
---|
421 | if len(args) != 2: |
---|
422 | msg = usage() |
---|
423 | print 'msg=%s' % msg |
---|
424 | raise Usage(msg) |
---|
425 | |
---|
426 | try: |
---|
427 | files_are_the_same(args, globals=globals, |
---|
428 | timesteps=timesteps, variables=variables, |
---|
429 | rel_tolerance=rel_tolerance, |
---|
430 | abs_tolerance=abs_tolerance) |
---|
431 | except RuntimeError, msg: |
---|
432 | print msg |
---|
433 | return 10 |
---|
434 | |
---|
435 | |
---|
436 | if __name__ == "__main__": |
---|
437 | global ProgName |
---|
438 | |
---|
439 | ProgName = os.path.basename(sys.argv[0]) |
---|
440 | |
---|
441 | sys.exit(main()) |
---|
442 | |
---|