source: misc/tools/write_large_files/rwil_big_file.py @ 6739

Last change on this file since 6739 was 6739, checked in by rwilson, 15 years ago

Added NetCDF4 large/64bit file test code. Will become testcase later.

  • Property svn:executable set to *
File size: 8.5 KB
Line 
1#!/usr/bin/env python
2################################################################################
3# A test program to write a large NetCDF file and see if we can read it.
4#
5# The file is written with a defined number of variables, each of a required
6# size.  Each variable consists of one or more 'slices', each 1000*1000 cells.
7# Each element of each slice of each variable is written with a *unique* value
8# that is checked when the file is read.
9#
10# So, element E of slice S ov variable V has the float value:
11#     V*1000 + S . E*1.0e-1
12# which will look like:
13#     VVVSSSS.0EEEEEE
14################################################################################
15
16import sys
17import getopt
18import time
19import Numeric
20import Scientific.IO.NetCDF as nc
21
22######
23# Various constants
24######
25
26# default modifier and number of files
27DEF_MODIFIER = 'M'
28
29# array slice definitions
30XDIM = 1024*1024
31SLICE_SIZE = XDIM*8     # 8 bytes / float
32MBYTE = 1024*1024
33
34# Dictionary for suffix modifiers
35Suffixes = {'M': 1024*1024,
36            'G': 1024*1024*1024
37           }
38
39######
40# Globals
41######
42
43TimeFileWrite = False
44TimeFileRead = False
45CloseAfterSlice = False
46Verbose = False
47
48# mask for variable names
49VarnameMask = 'var_%04d'
50
51
52######
53# Mainline code
54######
55
56##
57# @brief Read a NetCDF file - see if it as we wrote it.
58# @param filename The name of the file to read.
59# @param variable_size Size of variable, in bytes.
60# @param num_variables Number of variables.
61def read_file(filename, variable_size, num_variables):
62    # create a data array slice
63    slice_array_mask = Numeric.ones((XDIM,), 'd')
64
65    # if timing file read, get start time
66    if TimeFileRead:
67        start_file_read = time.time()
68
69    fid = nc.NetCDFFile(filename, 'r')
70
71    # num file bytes read counter
72    file_bytes_read = 0
73
74    for var_num in xrange(num_variables):
75        varname = VarnameMask % var_num
76
77        # num variable bytes written counter
78        var_bytes_read = 0
79
80        slice_number = 0
81        while var_bytes_read < variable_size:
82            var_bytes_read += SLICE_SIZE
83            file_bytes_read += SLICE_SIZE
84
85            # create expected slice array
86            slice_array = slice_array_mask*slice_number + var_num*1000
87            for x in xrange(XDIM):
88                slice_array[x] += x*1.0e-7
89
90            if Verbose:
91                print ('File %s, variable %s, reading slice %d: '
92                       'var=%.1fMiB, file=%.1fMiB' %
93                       (filename, varname, slice_number,
94                        float(var_bytes_read)/MBYTE,
95                        float(file_bytes_read)/MBYTE))
96
97            var_array = fid.variables[varname][slice_number,:]
98
99            if var_array != slice_array:
100                print 'Read variable %s, slice %d: got unexpected value' % \
101                      (varname, slice_number)
102                for x in xrange(XDIM):
103                    if var_array[x] != slice_array[x]:
104                        print 'Index %d, got %f, expected %f' % \
105                              (x, var_array[x], slice_array[x])
106                        return False
107
108            slice_number += 1
109
110            if CloseAfterSlice:
111                fid.close()
112                fid = nc.NetCDFFile(filename, 'r')
113
114    fid.close()
115
116    if TimeFileRead:
117        stop_file_read = time.time()
118        print ('Time to read file: %.2f sec' %
119               (stop_file_read - start_file_read))
120
121    return True
122
123
124##
125# @brief Write a NetCDF file with set number of variables of a defined size.
126# @param variable_size Size of variable, in bytes.
127# @param num_variables Number of required variables.
128# @return The filename of the created file.
129# @note Variable slice writes are interleaved.
130def write_file(variable_size, num_variables=1):
131    # set file and variable name masks
132    filename = 'test.nc'
133
134    # create a data array slice
135    slice_array_mask = Numeric.ones((XDIM,), 'd')
136
137    # if timing file write, remember start time
138    if TimeFileWrite:
139        start_file_write = time.time()
140
141    fid = nc.NetCDFFile(filename, 'wl')
142    fid.createDimension('y', None)
143    fid.createDimension('x', XDIM)
144
145    # create all required variables
146    for i in xrange(num_variables):
147        varname = VarnameMask % i
148        fid.createVariable(varname, 'd', ('y', 'x'))
149
150    # set counters to zero
151    var_bytes_written = 0
152    file_bytes_written = 0
153    slice_number = 0
154
155    while var_bytes_written < variable_size:
156        var_bytes_written += SLICE_SIZE
157
158        for var_num in xrange(num_variables):
159            varname = VarnameMask % var_num
160
161            # create unique slice array
162            slice_array = slice_array_mask*slice_number + var_num*1000
163            for x in xrange(XDIM):
164                slice_array[x] += x*1.0e-7
165
166            fid.variables[varname][slice_number,:] = slice_array
167
168            if CloseAfterSlice:
169                fid.close()
170                fid = nc.NetCDFFile(filename, 'al')
171#                fid = nc.NetCDFFile(filename, 'a')
172
173            file_bytes_written += SLICE_SIZE
174
175            if Verbose:
176                print ('File %s, variable %s, writing slice %d: '
177                       'var=%.1fMiB, file=%.1fMiB' %
178                       (filename, varname, slice_number,
179                        float(var_bytes_written)/MBYTE,
180                        float(file_bytes_written)/MBYTE))
181
182        slice_number += 1
183
184    fid.close()
185
186    if TimeFileWrite:
187        stop_file_write = time.time()
188        print ('Time to write file: %.2f sec' %
189               (stop_file_write - start_file_write))
190
191    return filename
192
193
194##
195# @brief Provide help for the befuddled user.
196# @return Doesn't, calls sys.exit().
197def usage(msg=None):
198    print "Usage: write_large_files <opts> <varsize> [<numvars>]"
199    print ""
200    print "where <varsize> is a number followed by an optional modifier:"
201    print "                    1024M or 4G"
202    print "                the assumed modifier if none is given is '%s'." \
203          % DEF_MODIFIER
204    print "  and <numvars> is the number of variables of the above size"
205    print "                    to write.  If not supplied, 1 is assumed."
206    print "  and <opts>    is zero or more of:"
207    print "                    -c s    close & open the output file after"
208    print "                            each variable slice is read/written,"
209    print "                    -t rf   time the complete file read,"
210    print "                    -t wf   time the complete file write,"
211
212    if msg:
213        derr, "\n%s" % msg
214
215    sys.exit(10)
216
217def main(argv=None):
218    global TimeFileWrite, TimeFileRead, CloseAfterSlice, Verbose
219
220    if argv is None:
221        argv = sys.argv
222
223    # parse command line args
224    try:
225        opts, args = getopt.getopt(argv[1:], "c:t:v", ["help"])
226    except getopt.error, msg:
227        usage(msg)
228
229    for (opt, optarg) in opts:
230        if opt == '-c':
231            optargchar = optarg[0].lower()
232            if optargchar == 's':
233                CloseAfterSlice = True
234            else:
235                usage("Unrecognized -c suboption: %s" % optarg)
236        elif opt == '-t':
237            optargchar = optarg[0].lower()
238            if optargchar == 'r':
239                optargchar = optarg[1].lower()
240                if optargchar == 'f':
241                    TimeFileRead = True
242                else:
243                    usage("Unrecognized -t suboption: %s" % optarg)
244            elif optargchar == 'w':
245                optargchar = optarg[1].lower()
246                if optargchar == 'f':
247                    TimeFileWrite = True
248                else:
249                    usage("Unrecognized -t suboption: %s" % optarg)
250            else:
251                usage("Unrecognized -t suboption: %s" % optarg)
252        elif opt == '-v':
253            Verbose = True
254        else:
255            usage("Unrecognized option: %s" % opt)
256
257    if len(args) != 1 and len(args) != 2:
258        usage()
259
260    var_size = args[0][:-1]
261    modifier =args[0][-1]
262
263    if modifier in '0123456789':
264        var_size = args[0]
265        modifier = DEF_MODIFIER
266    modifier = Suffixes.get(modifier, None)
267    if modifier is None:
268        usage()
269       
270    try:
271        var_size = int(var_size) * modifier
272    except:
273        usage()
274
275    num_vars = 1
276    if len(args) == 2:
277        try:
278            num_vars = int(args[1])
279        except:
280            usage()
281
282    # write the required file
283    filename = write_file(var_size, num_vars)
284       
285    # read the file to see if is as expected
286    if not read_file(filename, var_size, num_vars):
287        print "Didn't read data that we read!?"
288        return 10
289
290    print 'Read/write of NectCDF file was correct'
291    return 0
292
293
294if __name__ == "__main__":
295    sys.exit(main())
Note: See TracBrowser for help on using the repository browser.