#!/usr/bin/env python ################################################################################ # A test program to write a large NetCDF file and see if we can read it. # # The file is written with a defined number of variables, each of a required # size. Each variable consists of one or more 'slices', each 1000*1000 cells. # Each element of each slice of each variable is written with a *unique* value # that is checked when the file is read. # # So, for variable V (numbering from 0), slice S (starting from 0), element # E will range from 0 to 10**6-1, and the element value will be: # S*100 + V + E*1.0E-6 # which will look like: # SSSSVV.EEEEEE # This implies that the maximum number of variables written is 100 (0 to 99). ################################################################################ import sys import getopt import time import numpy as num import Scientific.IO.NetCDF as nc ###### # Various constants ###### # default modifier and number of files DEF_MODIFIER = 'M' # array slice definitions XDIM = 1000*1000 SLICE_SIZE = XDIM*8 # 8 bytes / float MBYTE = 1024*1024 # Dictionary for suffix modifiers Suffixes = {'M': 1024*1024, 'G': 1024*1024*1024 } ###### # Globals ###### TimeFileWrite = False TimeFileRead = False CloseAfterSlice = False Verbose = False # mask for variable names VarnameMask = 'var_%02d' ###### # Mainline code ###### ## # @brief Read a NetCDF file - see if it as we wrote it. # @param filename The name of the file to read. # @param variable_size Size of variable, in bytes. # @param num_variables Number of variables. def read_file(filename, variable_size, num_variables): # create a data array slice slice_array_mask = num.ones((XDIM,), 'd') # if timing file read, get start time if TimeFileRead: start_file_read = time.time() fid = nc.NetCDFFile(filename, 'r') # num file bytes read counter file_bytes_read = 0 for var_num in xrange(num_variables): varname = VarnameMask % var_num # num variable bytes written counter var_bytes_read = 0 slice_number = 0 while var_bytes_read < variable_size: var_bytes_read += SLICE_SIZE file_bytes_read += SLICE_SIZE # create expected slice array slice_array = slice_array_mask*slice_number*100 + var_num slice_array += ElementIndexArray if Verbose: print ('File %s, variable %s, reading slice %d: ' 'var=%.1fMiB, file=%.1fMiB' % (filename, varname, slice_number, float(var_bytes_read)/MBYTE, float(file_bytes_read)/MBYTE)) var_array = fid.variables[varname][slice_number,:] if not num.allclose(var_array, slice_array): ## if num.any(var_array != slice_array): print 'Read variable %s, slice %d: got unexpected value' % \ (varname, slice_number) for x in xrange(XDIM): if var_array[x] != slice_array[x]: print 'Index %d, got %f, expected %f' % \ (x, var_array[x], slice_array[x]) return False slice_number += 1 if CloseAfterSlice: fid.close() fid = nc.NetCDFFile(filename, 'r') fid.close() if TimeFileRead: stop_file_read = time.time() print ('Time to read file: %.2f sec' % (stop_file_read - start_file_read)) return True ## # @brief Write a NetCDF file with set number of variables of a defined size. # @param variable_size Size of variable, in bytes. # @param num_variables Number of required variables. # @return The filename of the created file. # @note Variable slice writes are interleaved. def write_file(variable_size, num_variables=1): # set file and variable name masks filename = 'test.nc' # create a data array slice slice_array_mask = num.ones((XDIM,), 'd') # if timing file write, remember start time if TimeFileWrite: start_file_write = time.time() fid = nc.NetCDFFile(filename, 'wl') fid.createDimension('y', None) fid.createDimension('x', XDIM) # create all required variables for i in xrange(num_variables): varname = VarnameMask % i fid.createVariable(varname, 'd', ('y', 'x')) # set counters to zero var_bytes_written = 0 file_bytes_written = 0 slice_number = 0 while var_bytes_written < variable_size: var_bytes_written += SLICE_SIZE for var_num in xrange(num_variables): varname = VarnameMask % var_num # create unique slice array slice_array = slice_array_mask*slice_number*100 + var_num slice_array += ElementIndexArray fid.variables[varname][slice_number,:] = slice_array if CloseAfterSlice: fid.close() # fid = nc.NetCDFFile(filename, 'al') fid = nc.NetCDFFile(filename, 'a') file_bytes_written += SLICE_SIZE if Verbose: print ('File %s, variable %s, writing slice %d: ' 'var=%.1fMiB, file=%.1fMiB' % (filename, varname, slice_number, float(var_bytes_written)/MBYTE, float(file_bytes_written)/MBYTE)) slice_number += 1 fid.close() if TimeFileWrite: stop_file_write = time.time() print ('Time to write file: %.2f sec' % (stop_file_write - start_file_write)) return filename ## # @brief Provide help for the befuddled user. # @return Doesn't, calls sys.exit(). def usage(msg=None): print "Usage: write_large_files []" print "" print "where is a number followed by an optional modifier:" print " 1024M or 4G" print " the assumed modifier if none is given is '%s'." \ % DEF_MODIFIER print " and is the number of variables of the above size" print " to write. If not supplied, 1 is assumed." print " There can be at most 100 variables." print " and is zero or more of:" print " -c s close & open the output file after" print " each variable slice is read/written," print " -t rf time the complete file read," print " -t wf time the complete file write," if msg: derr, "\n%s" % msg sys.exit(10) def main(argv=None): global TimeFileWrite, TimeFileRead, CloseAfterSlice, Verbose global ElementIndexArray if argv is None: argv = sys.argv # parse command line args try: opts, args = getopt.getopt(argv[1:], "c:t:v", ["help"]) except getopt.error, msg: usage(msg) for (opt, optarg) in opts: if opt == '-c': optargchar = optarg[0].lower() if optargchar == 's': CloseAfterSlice = True else: usage("Unrecognized -c suboption: %s" % optarg) elif opt == '-t': optargchar = optarg[0].lower() if optargchar == 'r': optargchar = optarg[1].lower() if optargchar == 'f': TimeFileRead = True else: usage("Unrecognized -t suboption: %s" % optarg) elif optargchar == 'w': optargchar = optarg[1].lower() if optargchar == 'f': TimeFileWrite = True else: usage("Unrecognized -t suboption: %s" % optarg) else: usage("Unrecognized -t suboption: %s" % optarg) elif opt == '-v': Verbose = True else: usage("Unrecognized option: %s" % opt) if len(args) != 1 and len(args) != 2: usage() var_size = args[0][:-1] modifier = args[0][-1] if modifier in '0123456789': var_size = args[0] modifier = DEF_MODIFIER modifier = Suffixes.get(modifier, None) if modifier is None: usage() try: var_size = int(var_size) * modifier except: usage() num_vars = 1 if len(args) == 2: try: num_vars = int(args[1]) except: usage() if num_vars > 100: usage() # initialize the global element index array which contains # 0.EEEEEE in each element. ElementIndexArray = num.ndarray((XDIM,), 'd') for x in xrange(XDIM): ElementIndexArray[x] = x*1.0e-6 # write the required file filename = write_file(var_size, num_vars) # read the file to see if is as expected if not read_file(filename, var_size, num_vars): print "Didn't read data that we read!?" return 10 # print 'Read/write of NectCDF file was correct' return 0 if __name__ == "__main__": sys.exit(main())