source: branches/numpy_misc/tools/write_large_files/rwi_big_file.py @ 6817

Last change on this file since 6817 was 6817, checked in by rwilson, 15 years ago

Changed the 'large file' netCDF test programs to use numpy.

  • Property svn:executable set to *
File size: 8.8 KB
Line 
1#!/usr/bin/env python
2################################################################################
3# A test program to write a large NetCDF file and see if we can read it.
4#
5# The file is written with a defined number of variables, each of a required
6# size.  Each variable consists of one or more 'slices', each 1000*1000 cells.
7# Each element of each slice of each variable is written with a *unique* value
8# that is checked when the file is read.
9#
10# So, element E of slice S ov variable V has the float value:
11#     V*1000 + S . E*1.0e-1
12# which will look like:
13#     VVVSSSS.0EEEEEE
14################################################################################
15
16import sys
17import getopt
18import time
19import numpy as num
20import Scientific.IO.NetCDF as nc
21
22######
23# Various constants
24######
25
26# default modifier and number of files
27DEF_MODIFIER = 'M'
28
29# array slice definitions
30XDIM = 1024*1024
31SLICE_SIZE = XDIM*8     # 8 bytes / float
32MBYTE = 1024*1024
33
34# Dictionary for suffix modifiers
35Suffixes = {'M': 1024*1024,
36            'G': 1024*1024*1024
37           }
38
39######
40# Globals
41######
42
43TimeFileWrite = False
44TimeFileRead = False
45CloseAfterSlice = False
46Verbose = False
47
48# mask for variable names
49VarnameMask = 'var_%04d'
50
51
52######
53# Mainline code
54######
55
56##
57# @brief Read a NetCDF file - see if it as we wrote it.
58# @param filename The name of the file to read.
59# @param variable_size Size of variable, in bytes.
60# @param num_variables Number of variables.
61def read_file(filename, variable_size, num_variables):
62    # create a data array slice
63    slice_array_mask = num.ones((XDIM,), 'd')
64
65    # if timing file read, get start time
66    if TimeFileRead:
67        start_file_read = time.time()
68
69    fid = nc.NetCDFFile(filename, 'r')
70
71    # num file bytes read counter
72    file_bytes_read = 0
73
74    result = True
75
76    for var_num in xrange(num_variables):
77        varname = VarnameMask % var_num
78        var = fid.variables[varname]
79
80        # num variable bytes written counter
81        var_bytes_read = 0
82
83        slice_number = 0
84        while var_bytes_read < variable_size:
85            var_bytes_read += SLICE_SIZE
86            file_bytes_read += SLICE_SIZE
87
88            # create expected slice array
89            slice_array = slice_array_mask*slice_number + var_num*1000
90            for x in xrange(XDIM):
91                slice_array[x] += x*1.0e-7
92
93            if Verbose:
94                print ('File %s, variable %s, reading slice %d: '
95                       'var=%.1fMiB, file=%.1fMiB' %
96                       (filename, varname, slice_number, float(var_bytes_read)/MBYTE,
97                        float(file_bytes_read)/MBYTE))
98
99            var_array = fid.variables[varname][slice_number,:]
100            if num.any(var_array != slice_array):
101                result = False
102                if Verbose:
103                    print 'Read variable %s, slice %d: got unexpected value' % \
104                          (varname, slice_number)
105                    for x in xrange(XDIM):
106                        if var_array[x] != slice_array[x]:
107                            print 'Got %f, expected %f' % (var_array[x], slice_array[x])
108                    sys.exit(0)
109                print 'Read variable %s, slice %d: got unexpected value' % \
110                      (varname, slice_number)
111                for x in xrange(XDIM):
112                    if var_array[x] != slice_array[x]:
113                        print 'Got %f, expected %f' % (var_array[x], slice_array[x])
114                        sys.exit(0)
115
116            slice_number += 1
117
118            if CloseAfterSlice:
119                fid.close()
120                fid = nc.NetCDFFile(filename, 'r')
121
122    fid.close()
123
124    if TimeFileRead:
125        stop_file_read = time.time()
126        print ('Time to read file: %.2f sec' %
127               (stop_file_read - start_file_read))
128
129    return result
130
131
132##
133# @brief Write a NetCDF file with set number of variables of a defined size.
134# @param variable_size Size of variable, in bytes.
135# @param num_variables Number of required variables.
136# @return The filename of the created file.
137# @note Variable slice writes are interleaved.
138def write_file(variable_size, num_variables=1):
139    # set file and variable name masks
140    filename = 'test.nc'
141
142    # create a data array slice
143    slice_array_mask = num.ones((XDIM,), 'd')
144
145    # if timing file write, remember start time
146    if TimeFileWrite:
147        start_file_write = time.time()
148
149    fid = nc.NetCDFFile(filename, 'w')
150    fid.createDimension('y', None)
151    fid.createDimension('x', XDIM)
152
153    # create all required variables
154    for i in xrange(num_variables):
155        varname = VarnameMask % i
156        fid.createVariable(varname, 'd', ('y', 'x'))
157
158    # set counters to zero
159    var_bytes_written = 0
160    file_bytes_written = 0
161    slice_number = 0
162
163    while var_bytes_written < variable_size:
164        var_bytes_written += SLICE_SIZE
165
166        for var_num in xrange(num_variables):
167            varname = VarnameMask % var_num
168
169            # create unique slice array
170            slice_array = slice_array_mask*slice_number + var_num*1000
171            for x in xrange(XDIM):
172                slice_array[x] += x*1.0e-7
173
174            fid.variables[varname][slice_number,:] = slice_array
175
176            if CloseAfterSlice:
177                fid.close()
178                fid = nc.NetCDFFile(filename, 'a')
179
180            file_bytes_written += SLICE_SIZE
181
182            if Verbose:
183                print ('File %s, variable %s, writing slice %d: '
184                       'var=%.1fMiB, file=%.1fMiB' %
185                       (filename, varname, slice_number,
186                        float(var_bytes_written)/MBYTE,
187                        float(file_bytes_written)/MBYTE))
188
189        slice_number += 1
190
191    if TimeFileWrite:
192        stop_file_write = time.time()
193        print ('Time to write file: %.2f sec' %
194               (stop_file_write - start_file_write))
195
196    return filename
197
198
199##
200# @brief Provide help for the befuddled user.
201# @return Doesn't, calls sys.exit().
202def usage(msg=None):
203    print "Usage: write_large_files <opts> <varsize> [<numvars>]"
204    print ""
205    print "where <varsize> is a number followed by an optional modifier:"
206    print "                    1024M or 4G"
207    print "                the assumed modifier if none is given is '%s'." \
208          % DEF_MODIFIER
209    print "  and <numvars> is the number of variables of the above size"
210    print "                    to write.  If not supplied, 1 is assumed."
211    print "  and <opts>    is zero or more of:"
212    print "                    -c s    close & open the output file after"
213    print "                            each variable slice is read/written,"
214    print "                    -t rf   time the complete file read,"
215    print "                    -t wf   time the complete file write,"
216
217    if msg:
218        derr, "\n%s" % msg
219
220    sys.exit(10)
221
222def main(argv=None):
223    global TimeFileWrite, TimeFileRead, CloseAfterSlice, Verbose
224
225    if argv is None:
226        argv = sys.argv
227
228    # parse command line args
229    try:
230        opts, args = getopt.getopt(argv[1:], "c:t:v", ["help"])
231    except getopt.error, msg:
232        usage(msg)
233
234    for (opt, optarg) in opts:
235        if opt == '-c':
236            optargchar = optarg[0].lower()
237            if optargchar == 's':
238                CloseAfterSlice = True
239            else:
240                usage("Unrecognized -c suboption: %s" % optarg)
241        elif opt == '-t':
242            optargchar = optarg[0].lower()
243            if optargchar == 'r':
244                optargchar = optarg[1].lower()
245                if optargchar == 'f':
246                    TimeFileRead = True
247                else:
248                    usage("Unrecognized -t suboption: %s" % optarg)
249            elif optargchar == 'w':
250                optargchar = optarg[1].lower()
251                if optargchar == 'f':
252                    TimeFileWrite = True
253                else:
254                    usage("Unrecognized -t suboption: %s" % optarg)
255            else:
256                usage("Unrecognized -t suboption: %s" % optarg)
257        elif opt == '-v':
258            Verbose = True
259        else:
260            usage("Unrecognized option: %s" % opt)
261
262    if len(args) != 1 and len(args) != 2:
263        usage()
264
265    var_size = args[0][:-1]
266    modifier =args[0][-1]
267
268    if modifier in '0123456789':
269        var_size = args[0]
270        modifier = DEF_MODIFIER
271    modifier = Suffixes.get(modifier, None)
272    if modifier is None:
273        usage()
274       
275    try:
276        var_size = int(var_size) * modifier
277    except:
278        usage()
279
280    num_vars = 1
281    if len(args) == 2:
282        try:
283            num_vars = int(args[1])
284        except:
285            usage()
286
287    # write the required file
288    filename = write_file(var_size, num_vars)
289       
290    # read the file to see if is as expected
291    if not read_file(filename, var_size, num_vars):
292        print "Didn't read data that we read!?"
293        return 10
294
295    print 'Read/write of NectCDF file was correct'
296    return 0
297
298
299if __name__ == "__main__":
300    sys.exit(main())
Note: See TracBrowser for help on using the repository browser.