source: trunk/anuga_core/source/anuga/abstract_2d_finite_volumes/file_function.py @ 8780

Last change on this file since 8780 was 8780, checked in by steve, 11 years ago

Some changes to allow netcdf4 use

File size: 17.6 KB
Line 
1#!/usr/bin/env python
2"""File function
3Takes a file as input, and returns it as a mathematical function.
4For example, you can load an arbitrary 2D heightfield mesh, and treat it as a
5function like so:
6
7F = file_function('my_mesh.sww', ...)
8evaluated_point = F(x, y)
9
10Values will be interpolated across the surface of the mesh. Holes in the mesh
11have an undefined value.
12
13"""
14
15import numpy as num
16
17from anuga.geospatial_data.geospatial_data import ensure_absolute
18from anuga.file.netcdf import NetCDFFile
19from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
20from anuga.utilities.numerical_tools import ensure_numeric
21
22import anuga.utilities.log as log
23
24
25def file_function(filename,
26                  domain=None,
27                  quantities=None,
28                  interpolation_points=None,
29                  time_thinning=1,
30                  time_limit=None,
31                  verbose=False,
32                  use_cache=False,
33                  boundary_polygon=None,
34                  output_centroids=False):
35    """Read time history of spatial data from NetCDF file and return
36    a callable object.
37
38    Input variables:
39   
40    filename - Name of sww, tms or sts file
41       
42       If the file has extension 'sww' then it is assumed to be spatio-temporal
43       or temporal and the callable object will have the form f(t,x,y) or f(t)
44       depending on whether the file contains spatial data
45
46       If the file has extension 'tms' then it is assumed to be temporal only
47       and the callable object will have the form f(t)
48
49       Either form will return interpolated values based on the input file
50       using the underlying interpolation_function.
51
52    domain - Associated domain object   
53       If domain is specified, model time (domain.starttime)
54       will be checked and possibly modified.
55   
56       All times are assumed to be in UTC
57       
58       All spatial information is assumed to be in absolute UTM coordinates.
59
60    quantities - the name of the quantity to be interpolated or a
61                 list of quantity names. The resulting function will return
62                 a tuple of values - one for each quantity
63                 If quantities are None, the default quantities are
64                 ['stage', 'xmomentum', 'ymomentum']
65                 
66
67    interpolation_points - list of absolute UTM coordinates for points (N x 2)
68    or geospatial object or points file name at which values are sought
69
70    time_thinning -
71
72    verbose -
73
74    use_cache: True means that caching of intermediate result of
75               Interpolation_function is attempted
76
77    boundary_polygon -
78
79   
80    See Interpolation function in anuga.fit_interpolate.interpolation for
81    further documentation
82    """
83
84    # FIXME (OLE): Should check origin of domain against that of file
85    # In fact, this is where origin should be converted to that of domain
86    # Also, check that file covers domain fully.
87
88    # Take into account:
89    # - domain's georef
90    # - sww file's georef
91    # - interpolation points as absolute UTM coordinates
92
93    if quantities is None:
94        if verbose:
95            msg = 'Quantities specified in file_function are None,'
96            msg += ' so using stage, xmomentum, and ymomentum in that order'
97            log.critical(msg)
98        quantities = ['stage', 'xmomentum', 'ymomentum']
99
100    # Use domain's startime if available
101    if domain is not None:   
102        domain_starttime = domain.get_starttime()
103    else:
104        domain_starttime = None
105
106    # Build arguments and keyword arguments for use with caching or apply.
107    args = (filename,)
108
109    # FIXME (Ole): Caching this function will not work well
110    # if domain is passed in as instances change hash code.
111    # Instead we pass in those attributes that are needed (and return them
112    # if modified)
113    kwargs = {'quantities': quantities,
114              'interpolation_points': interpolation_points,
115              'domain_starttime': domain_starttime,
116              'time_thinning': time_thinning,     
117              'time_limit': time_limit,                                 
118              'verbose': verbose,
119              'boundary_polygon': boundary_polygon,
120              'output_centroids': output_centroids}
121
122    # Call underlying engine with or without caching
123    if use_cache is True:
124        try:
125            from caching import cache
126        except:
127            msg = 'Caching was requested, but caching module'+\
128                  'could not be imported'
129            raise Exception(msg)
130
131        f, starttime = cache(_file_function,
132                             args, kwargs,
133                             dependencies=[filename],
134                             compression=False,                 
135                             verbose=verbose)
136    else:
137        f, starttime = apply(_file_function,
138                             args, kwargs)
139
140    #FIXME (Ole): Pass cache arguments, such as compression, in some sort of
141    #structure
142
143    f.starttime = starttime
144    f.filename = filename
145   
146    if domain is not None:
147        #Update domain.startime if it is *earlier* than starttime from file
148        if starttime > domain.starttime:
149            msg = 'WARNING: Start time as specified in domain (%f)' \
150                  % domain.starttime
151            msg += ' is earlier than the starttime of file %s (%f).' \
152                     % (filename, starttime)
153            msg += ' Modifying domain starttime accordingly.'
154           
155            if verbose: log.critical(msg)
156
157            domain.set_starttime(starttime) #Modifying model time
158
159            if verbose: log.critical('Domain starttime is now set to %f'
160                                     % domain.starttime)
161    return f
162
163
164def _file_function(filename,
165                   quantities=None,
166                   interpolation_points=None,
167                   domain_starttime=None,
168                   time_thinning=1,
169                   time_limit=None,
170                   verbose=False,
171                   boundary_polygon=None,
172                   output_centroids=False):
173    """Internal function
174   
175    See file_function for documentatiton
176    """
177
178    assert type(filename) == type(''),\
179               'First argument to File_function must be a string'
180
181    try:
182        fid = open(filename)
183    except IOError, e:
184        msg = 'File "%s" could not be opened: Error="%s"' % (filename, e)
185        raise IOError(msg)
186
187    # read first line of file, guess file type
188    line = fid.readline()
189    fid.close()
190
191    # FIXME SR: This test doesn't seem to work with netcdf4
192    if line[:3] == 'CDF':
193        return get_netcdf_file_function(filename,
194                                        quantities,
195                                        interpolation_points,
196                                        domain_starttime,
197                                        time_thinning=time_thinning,
198                                        time_limit=time_limit,
199                                        verbose=verbose,
200                                        boundary_polygon=boundary_polygon,
201                                        output_centroids=output_centroids)
202    else:
203        # FIXME (Ole): Could add csv file here to address Ted Rigby's
204        # suggestion about reading hydrographs.
205        # This may also deal with the gist of ticket:289
206        raise Exception('Must be a NetCDF File')
207
208
209def get_netcdf_file_function(filename,
210                             quantity_names=None,
211                             interpolation_points=None,
212                             domain_starttime=None,                           
213                             time_thinning=1,                 
214                             time_limit=None,           
215                             verbose=False,
216                             boundary_polygon=None,
217                             output_centroids=False):
218    """Read time history of spatial data from NetCDF sww file and
219    return a callable object f(t,x,y)
220    which will return interpolated values based on the input file.
221
222    Model time (domain_starttime)
223    will be checked, possibly modified and returned
224   
225    All times are assumed to be in UTC
226
227    See Interpolation function for further documetation
228    """
229
230    # FIXME: Check that model origin is the same as file's origin
231    # (both in UTM coordinates)
232    # If not - modify those from file to match domain
233    # (origin should be passed in)
234    # Take this code from e.g. dem2pts in data_manager.py
235    # FIXME: Use geo_reference to read and write xllcorner...
236
237    import time, calendar
238    from anuga.config import time_format
239
240    # Open NetCDF file
241    if verbose: log.critical('Reading %s' % filename)
242
243    fid = NetCDFFile(filename, netcdf_mode_r)
244
245    if isinstance(quantity_names, basestring):
246        quantity_names = [quantity_names]       
247
248    if quantity_names is None or len(quantity_names) < 1:
249        msg = 'No quantities are specified in file_function'
250        raise Exception(msg)
251 
252    if interpolation_points is not None:
253
254        #interpolation_points = num.array(interpolation_points, num.float)
255        interpolation_points = ensure_absolute(interpolation_points)
256        msg = 'Points must by N x 2. I got %d' % interpolation_points.shape[1]
257        assert interpolation_points.shape[1] == 2, msg
258
259    # Now assert that requested quantitites (and the independent ones)
260    # are present in file
261    missing = []
262    for quantity in ['time'] + quantity_names:
263        if not fid.variables.has_key(quantity):
264            missing.append(quantity)
265
266    if len(missing) > 0:
267        msg = 'Quantities %s could not be found in file %s'\
268              % (str(missing), filename)
269        fid.close()
270        raise Exception(msg)
271
272    # Decide whether this data has a spatial dimension
273    spatial = True
274    for quantity in ['x', 'y']:
275        if not fid.variables.has_key(quantity):
276            spatial = False
277
278    if filename[-3:] == 'tms' and spatial is True:
279        msg = 'Files of type TMS must not contain spatial information'
280        raise Exception(msg)
281
282    if filename[-3:] == 'sww' and spatial is False:
283        msg = 'Files of type SWW must contain spatial information'       
284        raise Exception(msg)
285
286    if filename[-3:] == 'sts' and spatial is False:
287        #What if mux file only contains one point
288        msg = 'Files of type STS must contain spatial information'       
289        raise Exception(msg)
290
291    # JJ REMOVED
292    #if filename[-3:] == 'sts' and boundary_polygon is None:
293    #    #What if mux file only contains one point
294    #    msg = 'Files of type sts require boundary polygon'       
295    #    raise Exception(msg)
296
297    # Get first timestep
298    try:
299        starttime = float(fid.starttime)
300    except ValueError:
301        msg = 'Could not read starttime from file %s' % filename
302        raise Exception(msg)
303
304
305    # Get variables
306    # if verbose: log.critical('Get variables'    )
307    time = fid.variables['time'][:]   
308    # FIXME(Ole): Is time monotoneous?
309
310    # Apply time limit if requested
311    upper_time_index = len(time)   
312    msg = 'Time vector obtained from file %s has length 0' % filename
313    assert upper_time_index > 0, msg
314   
315    if time_limit is not None:
316        # Adjust given time limit to given start time
317        time_limit = time_limit - starttime
318
319
320        # Find limit point
321        for i, t in enumerate(time):
322            if t > time_limit:
323                upper_time_index = i
324                break
325               
326        msg = 'Time vector is zero. Requested time limit is %f' % time_limit
327        assert upper_time_index > 0, msg
328
329        if time_limit < time[-1] and verbose is True:
330            log.critical('Limited time vector from %.2fs to %.2fs'
331                         % (time[-1], time_limit))
332
333    time = time[:upper_time_index]
334
335
336   
337   
338    # Get time independent stuff
339    if spatial:
340        # Get origin
341        #xllcorner = fid.xllcorner[0]
342        #yllcorner = fid.yllcorner[0]
343        #zone = fid.zone[0]
344
345        xllcorner = fid.xllcorner
346        yllcorner = fid.yllcorner
347        zone = fid.zone
348
349        x = fid.variables['x'][:]
350        y = fid.variables['y'][:]
351        if filename.endswith('sww'):
352            triangles = fid.variables['volumes'][:]
353
354        x = num.reshape(x, (len(x), 1))
355        y = num.reshape(y, (len(y), 1))
356        vertex_coordinates = num.concatenate((x, y), axis=1) #m x 2 array
357
358        if boundary_polygon is not None:
359            # Remove sts points that do not lie on boundary
360            # FIXME(Ole): Why don't we just remove such points from the list of
361            # points and associated data?
362            # I am actually convinced we can get rid of neighbour_gauge_id
363            # altogether as the sts file is produced using the ordering file.
364            # All sts points are therefore always present in the boundary.
365            # In fact, they *define* parts of the boundary.
366            boundary_polygon=ensure_numeric(boundary_polygon)
367            boundary_polygon[:, 0] -= xllcorner
368            boundary_polygon[:, 1] -= yllcorner
369            temp=[]
370            boundary_id=[]
371            gauge_id=[]
372            for i in range(len(boundary_polygon)):
373                for j in range(len(x)):
374                    if num.allclose(vertex_coordinates[j],
375                                    boundary_polygon[i], 1e-4):
376                        #FIXME:
377                        #currently gauges lat and long is stored as float and
378                        #then cast to double. This cuases slight repositioning
379                        #of vertex_coordinates.
380                        temp.append(boundary_polygon[i])
381                        gauge_id.append(j)
382                        boundary_id.append(i)
383                        break
384            gauge_neighbour_id=[]
385            for i in range(len(boundary_id)-1):
386                if boundary_id[i]+1==boundary_id[i+1]:
387                    gauge_neighbour_id.append(i+1)
388                else:
389                    gauge_neighbour_id.append(-1)
390            if boundary_id[len(boundary_id)-1]==len(boundary_polygon)-1 \
391               and boundary_id[0]==0:
392                gauge_neighbour_id.append(0)
393            else:
394                gauge_neighbour_id.append(-1)
395            gauge_neighbour_id=ensure_numeric(gauge_neighbour_id)
396
397            if len(num.compress(gauge_neighbour_id>=0, gauge_neighbour_id)) \
398               != len(temp)-1:
399                msg='incorrect number of segments'
400                raise Exception(msg)
401            vertex_coordinates=ensure_numeric(temp)
402            if len(vertex_coordinates)==0:
403                msg = 'None of the sts gauges fall on the boundary'
404                raise Exception(msg)
405        else:
406            gauge_neighbour_id=None
407
408        if interpolation_points is not None:
409            # Adjust for georef
410            interpolation_points[:, 0] -= xllcorner
411            interpolation_points[:, 1] -= yllcorner       
412    else:
413        gauge_neighbour_id=None
414       
415    if domain_starttime is not None:
416        # If domain_startime is *later* than starttime,
417        # move time back - relative to domain's time
418        if domain_starttime > starttime:
419            time = time - domain_starttime + starttime
420
421        # FIXME Use method in geo to reconcile
422        # if spatial:
423        # assert domain.geo_reference.xllcorner == xllcorner
424        # assert domain.geo_reference.yllcorner == yllcorner
425        # assert domain.geo_reference.zone == zone       
426       
427    if verbose:
428        log.critical('File_function data obtained from: %s' % filename)
429        log.critical('  References:')
430        if spatial:
431            log.critical('    Lower left corner: [%f, %f]'
432                         % (xllcorner, yllcorner))
433        log.critical('    Start time:   %f' % starttime)
434       
435   
436    # Produce values for desired data points at
437    # each timestep for each quantity
438    quantities = {}
439    for i, name in enumerate(quantity_names):
440        quantities[name] = fid.variables[name][:]
441        if boundary_polygon is not None:
442            #removes sts points that do not lie on boundary
443            quantities[name] = num.take(quantities[name], gauge_id, axis=1)
444           
445    # Close sww, tms or sts netcdf file         
446    fid.close()
447
448    from anuga.fit_interpolate.interpolate import Interpolation_function
449
450    if not spatial:
451        vertex_coordinates = triangles = interpolation_points = None
452    if filename[-3:] == 'sts':#added
453        triangles = None
454        #vertex coordinates is position of urs gauges
455
456    if verbose:
457        log.critical('Calling interpolation function')
458       
459    # Return Interpolation_function instance as well as
460    # starttime for use to possible modify that of domain
461    return (Interpolation_function(time,
462                                   quantities,
463                                   quantity_names,
464                                   vertex_coordinates,
465                                   triangles,
466                                   interpolation_points,
467                                   time_thinning=time_thinning,
468                                   verbose=verbose,
469                                   gauge_neighbour_id=gauge_neighbour_id,
470                                   output_centroids=output_centroids),
471            starttime)
472
473    # NOTE (Ole): Caching Interpolation function is too slow as
474    # the very long parameters need to be hashed.
Note: See TracBrowser for help on using the repository browser.