source: misc/tools/write_large_files/rwi4_big_file.py @ 6906

Last change on this file since 6906 was 6906, checked in by rwilson, 15 years ago

Patong beach validation changes plus back-merge changes.

  • Property svn:executable set to *
File size: 9.0 KB
Line 
1#!/usr/bin/env python
2################################################################################
3# A test program to write a large NetCDF file and see if we can read it.
4#
5# The file is written with a defined number of variables, each of a required
6# size.  Each variable consists of one or more 'slices', each 1000*1000 cells.
7# Each element of each slice of each variable is written with a *unique* value
8# that is checked when the file is read.
9#
10# So, for variable V (numbering from 0), slice S (starting from 0), element
11# E will range from 0 to 10**6-1, and the element value will be:
12#     S*100 + V + E*1.0E-6
13# which will look like:
14#     SSSSVV.EEEEEE
15# This implies that the maximum number of variables written is 100 (0 to 99).
16################################################################################
17
18import sys
19import getopt
20import time
21import Numeric
22import Scientific.IO.NetCDF as nc
23
24######
25# Various constants
26######
27
28# default modifier and number of files
29DEF_MODIFIER = 'M'
30
31# array slice definitions
32XDIM = 1000*1000
33SLICE_SIZE = XDIM*8     # 8 bytes / float
34MBYTE = 1024*1024
35
36# Dictionary for suffix modifiers
37Suffixes = {'M': 1024*1024,
38            'G': 1024*1024*1024
39           }
40
41######
42# Globals
43######
44
45TimeFileWrite = False
46TimeFileRead = False
47CloseAfterSlice = False
48Verbose = False
49
50# mask for variable names
51VarnameMask = 'var_%02d'
52
53
54######
55# Mainline code
56######
57
58##
59# @brief Read a NetCDF file - see if it as we wrote it.
60# @param filename The name of the file to read.
61# @param variable_size Size of variable, in bytes.
62# @param num_variables Number of variables.
63def read_file(filename, variable_size, num_variables):
64    # create a data array slice
65    slice_array_mask = Numeric.ones((XDIM,), 'd')
66
67    # if timing file read, get start time
68    if TimeFileRead:
69        start_file_read = time.time()
70
71    fid = nc.NetCDFFile(filename, 'r')
72
73    # num file bytes read counter
74    file_bytes_read = 0
75
76    for var_num in xrange(num_variables):
77        varname = VarnameMask % var_num
78
79        # num variable bytes written counter
80        var_bytes_read = 0
81
82        slice_number = 0
83        while var_bytes_read < variable_size:
84            var_bytes_read += SLICE_SIZE
85            file_bytes_read += SLICE_SIZE
86
87            # create expected slice array
88            slice_array = slice_array_mask*slice_number*100 + var_num
89            slice_array += ElementIndexArray
90
91            if Verbose:
92                print ('File %s, variable %s, reading slice %d: '
93                       'var=%.1fMiB, file=%.1fMiB' %
94                       (filename, varname, slice_number,
95                        float(var_bytes_read)/MBYTE,
96                        float(file_bytes_read)/MBYTE))
97
98            var_array = fid.variables[varname][slice_number,:]
99            if not num.allclose(var_array, slice_array):
100##            if num.any(var_array != slice_array):
101                print 'Read variable %s, slice %d: got unexpected value' % \
102                      (varname, slice_number)
103                for x in xrange(XDIM):
104                    if var_array[x] != slice_array[x]:
105                        print 'Index %d, got %f, expected %f' % \
106                              (x, var_array[x], slice_array[x])
107                        return False
108
109            slice_number += 1
110
111            if CloseAfterSlice:
112                fid.close()
113                fid = nc.NetCDFFile(filename, 'r')
114
115    fid.close()
116
117    if TimeFileRead:
118        stop_file_read = time.time()
119        print ('Time to read file: %.2f sec' %
120               (stop_file_read - start_file_read))
121
122    return True
123
124
125##
126# @brief Write a NetCDF file with set number of variables of a defined size.
127# @param variable_size Size of variable, in bytes.
128# @param num_variables Number of required variables.
129# @return The filename of the created file.
130# @note Variable slice writes are interleaved.
131def write_file(variable_size, num_variables=1):
132    # set file and variable name masks
133    filename = 'test.nc'
134
135    # create a data array slice
136    slice_array_mask = Numeric.ones((XDIM,), 'd')
137
138    # if timing file write, remember start time
139    if TimeFileWrite:
140        start_file_write = time.time()
141
142    fid = nc.NetCDFFile(filename, 'w4')
143    fid.createDimension('y', None)
144    fid.createDimension('x', XDIM)
145
146    # create all required variables
147    for i in xrange(num_variables):
148        varname = VarnameMask % i
149        fid.createVariable(varname, 'd', ('y', 'x'))
150
151    # set counters to zero
152    var_bytes_written = 0
153    file_bytes_written = 0
154    slice_number = 0
155
156    while var_bytes_written < variable_size:
157        var_bytes_written += SLICE_SIZE
158
159        for var_num in xrange(num_variables):
160            varname = VarnameMask % var_num
161
162            # create unique slice array
163            slice_array = slice_array_mask*slice_number*100 + var_num
164            slice_array += ElementIndexArray
165
166            fid.variables[varname][slice_number,:] = slice_array
167
168            if CloseAfterSlice:
169                fid.close()
170#                fid = nc.NetCDFFile(filename, 'a4')
171                fid = nc.NetCDFFile(filename, 'a')
172
173            file_bytes_written += SLICE_SIZE
174
175            if Verbose:
176                print ('File %s, variable %s, writing slice %d: '
177                       'var=%.1fMiB, file=%.1fMiB' %
178                       (filename, varname, slice_number,
179                        float(var_bytes_written)/MBYTE,
180                        float(file_bytes_written)/MBYTE))
181
182        slice_number += 1
183
184    fid.close()
185
186    if TimeFileWrite:
187        stop_file_write = time.time()
188        print ('Time to write file: %.2f sec' %
189               (stop_file_write - start_file_write))
190
191    return filename
192
193
194##
195# @brief Provide help for the befuddled user.
196# @return Doesn't, calls sys.exit().
197def usage(msg=None):
198    print "Usage: write_large_files <opts> <varsize> [<numvars>]"
199    print ""
200    print "where <varsize> is a number followed by an optional modifier:"
201    print "                    1024M or 4G"
202    print "                the assumed modifier if none is given is '%s'." \
203          % DEF_MODIFIER
204    print "  and <numvars> is the number of variables of the above size"
205    print "                    to write.  If not supplied, 1 is assumed."
206    print "                    There can be at most 100 variables."
207    print "  and <opts>    is zero or more of:"
208    print "                    -c s    close & open the output file after"
209    print "                            each variable slice is read/written,"
210    print "                    -t rf   time the complete file read,"
211    print "                    -t wf   time the complete file write,"
212
213    if msg:
214        derr, "\n%s" % msg
215
216    sys.exit(10)
217
218def main(argv=None):
219    global TimeFileWrite, TimeFileRead, CloseAfterSlice, Verbose
220    global ElementIndexArray
221
222    if argv is None:
223        argv = sys.argv
224
225    # parse command line args
226    try:
227        opts, args = getopt.getopt(argv[1:], "c:t:v", ["help"])
228    except getopt.error, msg:
229        usage(msg)
230
231    for (opt, optarg) in opts:
232        if opt == '-c':
233            optargchar = optarg[0].lower()
234            if optargchar == 's':
235                CloseAfterSlice = True
236            else:
237                usage("Unrecognized -c suboption: %s" % optarg)
238        elif opt == '-t':
239            optargchar = optarg[0].lower()
240            if optargchar == 'r':
241                optargchar = optarg[1].lower()
242                if optargchar == 'f':
243                    TimeFileRead = True
244                else:
245                    usage("Unrecognized -t suboption: %s" % optarg)
246            elif optargchar == 'w':
247                optargchar = optarg[1].lower()
248                if optargchar == 'f':
249                    TimeFileWrite = True
250                else:
251                    usage("Unrecognized -t suboption: %s" % optarg)
252            else:
253                usage("Unrecognized -t suboption: %s" % optarg)
254        elif opt == '-v':
255            Verbose = True
256        else:
257            usage("Unrecognized option: %s" % opt)
258
259    if len(args) != 1 and len(args) != 2:
260        usage()
261
262    var_size = args[0][:-1]
263    modifier = args[0][-1]
264
265    if modifier in '0123456789':
266        var_size = args[0]
267        modifier = DEF_MODIFIER
268    modifier = Suffixes.get(modifier, None)
269    if modifier is None:
270        usage()
271       
272    try:
273        var_size = int(var_size) * modifier
274    except:
275        usage()
276
277    num_vars = 1
278    if len(args) == 2:
279        try:
280            num_vars = int(args[1])
281        except:
282            usage()
283
284    if num_vars > 100:
285        usage()
286
287    # initialize the global element index array which contains
288    # 0.EEEEEE in each element.
289    ElementIndexArray = num.ndarray((XDIM,), 'd')
290    for x in xrange(XDIM):
291        ElementIndexArray[x] = x*1.0e-6
292   
293    # write the required file
294    filename = write_file(var_size, num_vars)
295       
296    # read the file to see if is as expected
297    if not read_file(filename, var_size, num_vars):
298        print "Didn't read data that we read!?"
299        return 10
300
301#    print 'Read/write of NectCDF file was correct'
302    return 0
303
304
305if __name__ == "__main__":
306    sys.exit(main())
Note: See TracBrowser for help on using the repository browser.