source: misc/tools/write_large_files/rwi_big_file.py @ 7276

Last change on this file since 7276 was 7276, checked in by ole, 15 years ago

Merged numpy branch back into the trunk.

In ~/sandpit/anuga/anuga_core/source
svn merge -r 6246:HEAD ../../branches/numpy .

In ~/sandpit/anuga/anuga_validation
svn merge -r 6417:HEAD ../branches/numpy_anuga_validation .

In ~/sandpit/anuga/misc
svn merge -r 6809:HEAD ../branches/numpy_misc .

For all merges, I used numpy version where conflicts existed

The suites test_all.py (in source/anuga) and validate_all.py passed using Python2.5 with numpy on my Ubuntu Linux box.

  • Property svn:executable set to *
File size: 9.0 KB
Line 
1#!/usr/bin/env python
2################################################################################
3# A test program to write a large NetCDF file and see if we can read it.
4#
5# The file is written with a defined number of variables, each of a required
6# size.  Each variable consists of one or more 'slices', each 1000*1000 cells.
7# Each element of each slice of each variable is written with a *unique* value
8# that is checked when the file is read.
9#
10# So, for variable V (numbering from 0), slice S (starting from 0), element
11# E will range from 0 to 10**6-1, and the element value will be:
12#     S*100 + V + E*1.0E-6
13# which will look like:
14#     SSSSVV.EEEEEE
15# This implies that the maximum number of variables written is 100 (0 to 99).
16################################################################################
17
18import sys
19import getopt
20import time
21import numpy as num
22import Scientific.IO.NetCDF as nc
23
24######
25# Various constants
26######
27
28# default modifier and number of files
29DEF_MODIFIER = 'M'
30
31# array slice definitions
32XDIM = 1000*1000
33SLICE_SIZE = XDIM*8     # 8 bytes / float
34MBYTE = 1024*1024
35
36# Dictionary for suffix modifiers
37Suffixes = {'M': 1024*1024,
38            'G': 1024*1024*1024
39           }
40
41######
42# Globals
43######
44
45TimeFileWrite = False
46TimeFileRead = False
47CloseAfterSlice = False
48Verbose = False
49
50# mask for variable names
51VarnameMask = 'var_%02d'
52
53
54######
55# Mainline code
56######
57
58##
59# @brief Read a NetCDF file - see if it as we wrote it.
60# @param filename The name of the file to read.
61# @param variable_size Size of variable, in bytes.
62# @param num_variables Number of variables.
63def read_file(filename, variable_size, num_variables):
64    # create a data array slice
65    slice_array_mask = num.ones((XDIM,), 'd')
66
67    # if timing file read, get start time
68    if TimeFileRead:
69        start_file_read = time.time()
70
71    fid = nc.NetCDFFile(filename, 'r')
72
73    # num file bytes read counter
74    file_bytes_read = 0
75
76    result = True
77
78    for var_num in xrange(num_variables):
79        varname = VarnameMask % var_num
80        var = fid.variables[varname]
81
82        # num variable bytes written counter
83        var_bytes_read = 0
84
85        slice_number = 0
86        while var_bytes_read < variable_size:
87            var_bytes_read += SLICE_SIZE
88            file_bytes_read += SLICE_SIZE
89
90            # create expected slice array
91            slice_array = slice_array_mask*slice_number*100 + var_num
92            slice_array += ElementIndexArray
93
94            if Verbose:
95                print ('File %s, variable %s, reading slice %d: '
96                       'var=%.1fMiB, file=%.1fMiB' %
97                       (filename, varname, slice_number,
98                        float(var_bytes_read)/MBYTE,
99                        float(file_bytes_read)/MBYTE))
100
101            var_array = fid.variables[varname][slice_number,:]
102            if not num.allclose(var_array, slice_array):
103##            if num.any(var_array != slice_array):
104                print 'Read variable %s, slice %d: got unexpected value' % \
105                      (varname, slice_number)
106                for x in xrange(XDIM):
107                    if var_array[x] != slice_array[x]:
108                        print 'Index %d, got %f, expected %f' % \
109                              (x, var_array[x], slice_array[x])
110                        return False
111
112            slice_number += 1
113
114            if CloseAfterSlice:
115                fid.close()
116                fid = nc.NetCDFFile(filename, 'r')
117
118    fid.close()
119
120    if TimeFileRead:
121        stop_file_read = time.time()
122        print ('Time to read file: %.2f sec' %
123               (stop_file_read - start_file_read))
124
125    return True
126
127
128##
129# @brief Write a NetCDF file with set number of variables of a defined size.
130# @param variable_size Size of variable, in bytes.
131# @param num_variables Number of required variables.
132# @return The filename of the created file.
133# @note Variable slice writes are interleaved.
134def write_file(variable_size, num_variables=1):
135    # set file and variable name masks
136    filename = 'test.nc'
137
138    # create a data array slice
139    slice_array_mask = num.ones((XDIM,), 'd')
140
141    # if timing file write, remember start time
142    if TimeFileWrite:
143        start_file_write = time.time()
144
145    fid = nc.NetCDFFile(filename, 'w')
146    fid.createDimension('y', None)
147    fid.createDimension('x', XDIM)
148
149    # create all required variables
150    for i in xrange(num_variables):
151        varname = VarnameMask % i
152        fid.createVariable(varname, 'd', ('y', 'x'))
153
154    # set counters to zero
155    var_bytes_written = 0
156    file_bytes_written = 0
157    slice_number = 0
158
159    while var_bytes_written < variable_size:
160        var_bytes_written += SLICE_SIZE
161
162        for var_num in xrange(num_variables):
163            varname = VarnameMask % var_num
164
165            # create unique slice array
166            slice_array = slice_array_mask*slice_number*100 + var_num
167            slice_array += ElementIndexArray
168
169            fid.variables[varname][slice_number,:] = slice_array
170
171            if CloseAfterSlice:
172                fid.close()
173                fid = nc.NetCDFFile(filename, 'a')
174
175            file_bytes_written += SLICE_SIZE
176
177            if Verbose:
178                print ('File %s, variable %s, writing slice %d: '
179                       'var=%.1fMiB, file=%.1fMiB' %
180                       (filename, varname, slice_number,
181                        float(var_bytes_written)/MBYTE,
182                        float(file_bytes_written)/MBYTE))
183
184        slice_number += 1
185
186    fid.close()
187
188    if TimeFileWrite:
189        stop_file_write = time.time()
190        print ('Time to write file: %.2f sec' %
191               (stop_file_write - start_file_write))
192
193    return filename
194
195
196##
197# @brief Provide help for the befuddled user.
198# @return Doesn't, calls sys.exit().
199def usage(msg=None):
200    print "Usage: write_large_files <opts> <varsize> [<numvars>]"
201    print ""
202    print "where <varsize> is a number followed by an optional modifier:"
203    print "                    1024M or 4G"
204    print "                the assumed modifier if none is given is '%s'." \
205          % DEF_MODIFIER
206    print "  and <numvars> is the number of variables of the above size"
207    print "                    to write.  If not supplied, 1 is assumed."
208    print "                    There can be at most 100 variables."
209    print "  and <opts>    is zero or more of:"
210    print "                    -c s    close & open the output file after"
211    print "                            each variable slice is read/written,"
212    print "                    -t rf   time the complete file read,"
213    print "                    -t wf   time the complete file write,"
214
215    if msg:
216        derr, "\n%s" % msg
217
218    sys.exit(10)
219
220def main(argv=None):
221    global TimeFileWrite, TimeFileRead, CloseAfterSlice, Verbose
222    global ElementIndexArray
223
224    if argv is None:
225        argv = sys.argv
226
227    # parse command line args
228    try:
229        opts, args = getopt.getopt(argv[1:], "c:t:v", ["help"])
230    except getopt.error, msg:
231        usage(msg)
232
233    for (opt, optarg) in opts:
234        if opt == '-c':
235            optargchar = optarg[0].lower()
236            if optargchar == 's':
237                CloseAfterSlice = True
238            else:
239                usage("Unrecognized -c suboption: %s" % optarg)
240        elif opt == '-t':
241            optargchar = optarg[0].lower()
242            if optargchar == 'r':
243                optargchar = optarg[1].lower()
244                if optargchar == 'f':
245                    TimeFileRead = True
246                else:
247                    usage("Unrecognized -t suboption: %s" % optarg)
248            elif optargchar == 'w':
249                optargchar = optarg[1].lower()
250                if optargchar == 'f':
251                    TimeFileWrite = True
252                else:
253                    usage("Unrecognized -t suboption: %s" % optarg)
254            else:
255                usage("Unrecognized -t suboption: %s" % optarg)
256        elif opt == '-v':
257            Verbose = True
258        else:
259            usage("Unrecognized option: %s" % opt)
260
261    if len(args) != 1 and len(args) != 2:
262        usage()
263
264    var_size = args[0][:-1]
265    modifier = args[0][-1]
266
267    if modifier in '0123456789':
268        var_size = args[0]
269        modifier = DEF_MODIFIER
270    modifier = Suffixes.get(modifier, None)
271    if modifier is None:
272        usage()
273       
274    try:
275        var_size = int(var_size) * modifier
276    except:
277        usage()
278
279    num_vars = 1
280    if len(args) == 2:
281        try:
282            num_vars = int(args[1])
283        except:
284            usage()
285
286    if num_vars > 100:
287        usage()
288
289    # initialize the global element index array which contains
290    # 0.EEEEEE in each element.
291    ElementIndexArray = num.ndarray((XDIM,), 'd')
292    for x in xrange(XDIM):
293        ElementIndexArray[x] = x*1.0e-6
294   
295    # write the required file
296    filename = write_file(var_size, num_vars)
297       
298    # read the file to see if is as expected
299    if not read_file(filename, var_size, num_vars):
300        print "Didn't read data that we read!?"
301        return 10
302
303#    print 'Read/write of NectCDF file was correct'
304    return 0
305
306
307if __name__ == "__main__":
308    sys.exit(main())
Note: See TracBrowser for help on using the repository browser.