source: trunk/anuga_core/source/anuga/shallow_water/data_manager.py @ 7765

Last change on this file since 7765 was 7765, checked in by hudson, 14 years ago

Refactoring - moved file conversion routines to file_conversion folder, moved file loading/saving functions to file module.

File size: 72.0 KB
Line 
1"""datamanager.py - input output for AnuGA
2
3
4This module takes care of reading and writing datafiles such as topograhies,
5model output, etc
6
7
8Formats used within AnuGA:
9
10.sww: Netcdf format for storing model output f(t,x,y)
11.tms: Netcdf format for storing time series f(t)
12
13.csv: ASCII format for storing arbitrary points and associated attributes
14.pts: NetCDF format for storing arbitrary points and associated attributes
15
16.asc: ASCII format of regular DEMs as output from ArcView
17.prj: Associated ArcView file giving more meta data for asc format
18.ers: ERMapper header format of regular DEMs for ArcView
19
20.dem: NetCDF representation of regular DEM data
21
22.tsh: ASCII format for storing meshes and associated boundary and region info
23.msh: NetCDF format for storing meshes and associated boundary and region info
24
25.nc: Native ferret NetCDF format
26.geo: Houdinis ascii geometry format (?)
27
28
29A typical dataflow can be described as follows
30
31Manually created files:
32ASC, PRJ:     Digital elevation models (gridded)
33TSH:          Triangular meshes (e.g. created from anuga.pmesh)
34NC            Model outputs for use as boundary conditions (e.g from MOST)
35
36
37AUTOMATICALLY CREATED FILES:
38
39ASC, PRJ  ->  DEM  ->  PTS: Conversion of DEM's to native pts file
40
41NC -> SWW: Conversion of MOST bundary files to boundary sww
42
43PTS + TSH -> TSH with elevation: Least squares fit
44
45TSH -> SWW:  Conversion of TSH to sww viewable using Swollen
46
47TSH + Boundary SWW -> SWW: Simluation using abstract_2d_finite_volumes
48
49"""
50
51# This file was reverted from changeset:5484 to changeset:5470 on 10th July
52# by Ole.
53
54import os, sys
55import csv
56import string
57import shutil
58from struct import unpack
59import array as p_array
60from os import sep, path, remove, mkdir, access, F_OK, W_OK, getcwd
61
62import numpy as num
63
64from Scientific.IO.NetCDF import NetCDFFile
65from os.path import exists, basename, join
66from os import getcwd
67
68from anuga.coordinate_transforms.redfearn import redfearn, \
69     convert_from_latlon_to_utm
70from anuga.coordinate_transforms.geo_reference import Geo_reference, \
71     write_NetCDF_georeference, ensure_geo_reference
72from anuga.geospatial_data.geospatial_data import Geospatial_data,\
73     ensure_absolute
74from anuga.config import minimum_storable_height as \
75     default_minimum_storable_height
76from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
77from anuga.config import netcdf_float, netcdf_float32, netcdf_int
78from anuga.config import max_float
79from anuga.utilities.numerical_tools import ensure_numeric,  mean
80from anuga.caching.caching import myhash
81from anuga.shallow_water.shallow_water_domain import Domain
82from anuga.abstract_2d_finite_volumes.pmesh2domain import \
83     pmesh_to_domain_instance
84from anuga.abstract_2d_finite_volumes.util import get_revision_number, \
85     remove_lone_verts, sww2timeseries, get_centroid_values
86
87from anuga.abstract_2d_finite_volumes.neighbour_mesh import segment_midpoints
88from anuga.load_mesh.loadASCII import export_mesh_file
89from anuga.geometry.polygon import intersection
90from anuga.file_conversion.sww2dem import sww2dem
91
92from anuga.utilities.system_tools import get_vars_in_expression
93import anuga.utilities.log as log
94
95from anuga.utilities.file_utils import create_filename,\
96                        get_all_swwfiles
97from anuga.file.csv_file import load_csv_as_dict
98from sww_file import Read_sww, Write_sww
99
100from anuga.anuga_exceptions import DataMissingValuesError, \
101                DataFileNotOpenError, DataTimeError, DataDomainError, \
102                NewQuantity
103
104
105
106def csv2building_polygons(file_name,
107                          floor_height=3,
108                          clipping_polygons=None):
109    """
110    Convert CSV files of the form:
111
112    easting,northing,id,floors
113    422664.22,870785.46,2,0
114    422672.48,870780.14,2,0
115    422668.17,870772.62,2,0
116    422660.35,870777.17,2,0
117    422664.22,870785.46,2,0
118    422661.30,871215.06,3,1
119    422667.50,871215.70,3,1
120    422668.30,871204.86,3,1
121    422662.21,871204.33,3,1
122    422661.30,871215.06,3,1
123
124    to a dictionary of polygons with id as key.
125    The associated number of floors are converted to m above MSL and
126    returned as a separate dictionary also keyed by id.
127   
128    Optional parameter floor_height is the height of each building story.
129    Optional parameter clipping_olygons is a list of polygons selecting
130    buildings. Any building not in these polygons will be omitted.
131   
132    See csv2polygons for more details
133    """
134
135    polygons, values = csv2polygons(file_name,
136                                    value_name='floors',
137                                    clipping_polygons=None)   
138
139   
140    heights = {}
141    for key in values.keys():
142        v = float(values[key])
143        heights[key] = v*floor_height
144       
145    return polygons, heights               
146           
147
148##
149# @brief Convert CSV file into a dictionary of polygons and associated values.
150# @param filename The path to the file to read, value_name name for the 4th column
151def csv2polygons(file_name,
152                 value_name='value',
153                 clipping_polygons=None):
154    """
155    Convert CSV files of the form:
156
157    easting,northing,id,value
158    422664.22,870785.46,2,0
159    422672.48,870780.14,2,0
160    422668.17,870772.62,2,0
161    422660.35,870777.17,2,0
162    422664.22,870785.46,2,0
163    422661.30,871215.06,3,1
164    422667.50,871215.70,3,1
165    422668.30,871204.86,3,1
166    422662.21,871204.33,3,1
167    422661.30,871215.06,3,1
168
169    to a dictionary of polygons with id as key.
170    The associated values are returned as a separate dictionary also keyed by id.
171
172
173    easting: x coordinate relative to zone implied by the model
174    northing: y coordinate relative to zone implied by the model   
175    id: tag for polygon comprising points with this tag
176    value: numeral associated with each polygon. These must be the same for all points in each polygon.
177   
178    The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
179    in which case the returned values will be None
180   
181    Eastings and Northings will be returned as floating point values while
182    id and values will be returned as strings.
183
184    Optional argument: clipping_polygons will select only those polygons that are
185    fully within one or more of the clipping_polygons. In other words any polygon from
186    the csv file which has at least one point not inside one of the clipping polygons
187    will be excluded
188   
189    See underlying function load_csv_as_dict for more details.
190    """
191
192    X, _ = load_csv_as_dict(file_name)
193
194    msg = 'Polygon csv file must have 3 or 4 columns'
195    assert len(X.keys()) in [3, 4], msg
196   
197    msg = 'Did not find expected column header: easting'
198    assert 'easting' in X.keys(), msg
199   
200    msg = 'Did not find expected column header: northing'   
201    assert 'northing' in X.keys(), northing
202   
203    msg = 'Did not find expected column header: northing'       
204    assert 'id' in X.keys(), msg
205   
206    if value_name is not None:
207        msg = 'Did not find expected column header: %s' % value_name       
208        assert value_name in X.keys(), msg   
209   
210    polygons = {}
211    if len(X.keys()) == 4:
212        values = {}
213    else:
214        values = None
215
216    # Loop through entries and compose polygons
217    excluded_polygons={}
218    past_ids = {}
219    last_id = None
220    for i, id in enumerate(X['id']):
221
222        # Check for duplicate polygons
223        if id in past_ids:
224            msg = 'Polygon %s was duplicated in line %d' % (id, i)
225            raise Exception, msg
226       
227        if id not in polygons:
228            # Start new polygon
229            polygons[id] = []
230            if values is not None:
231                values[id] = X[value_name][i]
232
233            # Keep track of previous polygon ids
234            if last_id is not None:
235                past_ids[last_id] = i
236           
237        # Append this point to current polygon
238        point = [float(X['easting'][i]), float(X['northing'][i])]
239
240        if clipping_polygons is not None:
241            exclude=True
242            for clipping_polygon in clipping_polygons:
243                if inside_polygon(point, clipping_polygon):
244                    exclude=False
245                    break
246               
247            if exclude is True:
248                excluded_polygons[id]=True
249
250        polygons[id].append(point)   
251           
252        # Check that value is the same across each polygon
253        msg = 'Values must be the same across each polygon.'
254        msg += 'I got %s in line %d but it should have been %s' % (X[value_name][i], i, values[id])
255        assert values[id] == X[value_name][i], msg
256
257        last_id = id
258
259    # Weed out polygons that were not wholly inside clipping polygons
260    for id in excluded_polygons:
261        del polygons[id]
262       
263    return polygons, values
264
265
266           
267
268
269
270##
271# @brief
272# @param filename
273# @param x
274# @param y
275# @param z
276def write_obj(filename, x, y, z):
277    """Store x,y,z vectors into filename (obj format).
278
279       Vectors are assumed to have dimension (M,3) where
280       M corresponds to the number elements.
281       triangles are assumed to be disconnected
282
283       The three numbers in each vector correspond to three vertices,
284
285       e.g. the x coordinate of vertex 1 of element i is in x[i,1]
286    """
287
288    import os.path
289
290    root, ext = os.path.splitext(filename)
291    if ext == '.obj':
292        FN = filename
293    else:
294        FN = filename + '.obj'
295
296    outfile = open(FN, 'wb')
297    outfile.write("# Triangulation as an obj file\n")
298
299    M, N = x.shape
300    assert N == 3  #Assuming three vertices per element
301
302    for i in range(M):
303        for j in range(N):
304            outfile.write("v %f %f %f\n" % (x[i,j], y[i,j], z[i,j]))
305
306    for i in range(M):
307        base = i * N
308        outfile.write("f %d %d %d\n" % (base+1, base+2, base+3))
309
310    outfile.close()
311
312##
313# @brief Filter data file, selecting timesteps first:step:last.
314# @param filename1 Data file to filter.
315# @param filename2 File to write filtered timesteps to.
316# @param first First timestep.
317# @param last Last timestep.
318# @param step Timestep stride.
319def filter_netcdf(filename1, filename2, first=0, last=None, step=1):
320    """Filter data file, selecting timesteps first:step:last.
321   
322    Read netcdf filename1, pick timesteps first:step:last and save to
323    nettcdf file filename2
324    """
325
326    from Scientific.IO.NetCDF import NetCDFFile
327
328    # Get NetCDF
329    infile = NetCDFFile(filename1, netcdf_mode_r)  #Open existing file for read
330    outfile = NetCDFFile(filename2, netcdf_mode_w)  #Open new file
331
332    # Copy dimensions
333    for d in infile.dimensions:
334        outfile.createDimension(d, infile.dimensions[d])
335
336    # Copy variable definitions
337    for name in infile.variables:
338        var = infile.variables[name]
339        outfile.createVariable(name, var.dtype.char, var.dimensions)
340
341    # Copy the static variables
342    for name in infile.variables:
343        if name == 'time' or name == 'stage':
344            pass
345        else:
346            outfile.variables[name][:] = infile.variables[name][:]
347
348    # Copy selected timesteps
349    time = infile.variables['time']
350    stage = infile.variables['stage']
351
352    newtime = outfile.variables['time']
353    newstage = outfile.variables['stage']
354
355    if last is None:
356        last = len(time)
357
358    selection = range(first, last, step)
359    for i, j in enumerate(selection):
360        log.critical('Copying timestep %d of %d (%f)'
361                     % (j, last-first, time[j]))
362        newtime[i] = time[j]
363        newstage[i,:] = stage[j,:]
364
365    # Close
366    infile.close()
367    outfile.close()
368
369
370##
371# @brief
372# @param basename_in
373# @param extra_name_out
374# @param quantities
375# @param timestep
376# @param reduction
377# @param cellsize
378# @param number_of_decimal_places
379# @param NODATA_value
380# @param easting_min
381# @param easting_max
382# @param northing_min
383# @param northing_max
384# @param verbose
385# @param origin
386# @param datum
387# @param format
388# @return
389def export_grid(basename_in, extra_name_out=None,
390                quantities=None, # defaults to elevation
391                reduction=None,
392                cellsize=10,
393                number_of_decimal_places=None,
394                NODATA_value=-9999,
395                easting_min=None,
396                easting_max=None,
397                northing_min=None,
398                northing_max=None,
399                verbose=False,
400                origin=None,
401                datum='WGS84',
402                format='ers'):
403    """Wrapper for sww2dem.
404    See sww2dem to find out what most of the parameters do.
405
406    Quantities is a list of quantities.  Each quantity will be
407    calculated for each sww file.
408
409    This returns the basenames of the files returned, which is made up
410    of the dir and all of the file name, except the extension.
411
412    This function returns the names of the files produced.
413
414    It will also produce as many output files as there are input sww files.
415    """
416
417    if quantities is None:
418        quantities = ['elevation']
419
420    if type(quantities) is str:
421            quantities = [quantities]
422
423    # How many sww files are there?
424    dir, base = os.path.split(basename_in)
425
426    iterate_over = get_all_swwfiles(dir, base, verbose)
427
428    if dir == "":
429        dir = "." # Unix compatibility
430
431    files_out = []
432    for sww_file in iterate_over:
433        for quantity in quantities:
434            if extra_name_out is None:
435                basename_out = sww_file + '_' + quantity
436            else:
437                basename_out = sww_file + '_' + quantity + '_' + extra_name_out
438
439            file_out = sww2dem(dir+sep+sww_file, dir+sep+basename_out,
440                               quantity,
441                               reduction,
442                               cellsize,
443                               number_of_decimal_places,
444                               NODATA_value,
445                               easting_min,
446                               easting_max,
447                               northing_min,
448                               northing_max,
449                               verbose,
450                               origin,
451                               datum,
452                               format)
453            files_out.append(file_out)
454    return files_out
455
456
457
458##
459# @brief Get the extents of a NetCDF data file.
460# @param file_name The path to the SWW file.
461# @return A list of x, y, z and stage limits (min, max).
462def extent_sww(file_name):
463    """Read in an sww file.
464
465    Input:
466    file_name - the sww file
467
468    Output:
469    A list: [min(x),max(x),min(y),max(y),min(stage.flat),max(stage.flat)]
470    """
471
472    from Scientific.IO.NetCDF import NetCDFFile
473
474    #Get NetCDF
475    fid = NetCDFFile(file_name, netcdf_mode_r)
476
477    # Get the variables
478    x = fid.variables['x'][:]
479    y = fid.variables['y'][:]
480    stage = fid.variables['stage'][:]
481
482    fid.close()
483
484    return [min(x), max(x), min(y), max(y), num.min(stage), num.max(stage)]
485
486
487##
488# @brief
489# @param filename
490# @param boundary
491# @param t
492# @param fail_if_NaN
493# @param NaN_filler
494# @param verbose
495# @param very_verbose
496# @return
497def load_sww_as_domain(filename, boundary=None, t=None,
498               fail_if_NaN=True, NaN_filler=0,
499               verbose=False, very_verbose=False):
500    """
501    Usage: domain = sww2domain('file.sww',t=time (default = last time in file))
502
503    Boundary is not recommended if domain.smooth is not selected, as it
504    uses unique coordinates, but not unique boundaries. This means that
505    the boundary file will not be compatable with the coordinates, and will
506    give a different final boundary, or crash.
507    """
508   
509    from Scientific.IO.NetCDF import NetCDFFile
510    from shallow_water import Domain
511
512    # initialise NaN.
513    NaN = 9.969209968386869e+036
514
515    if verbose: log.critical('Reading from %s' % filename)
516
517    fid = NetCDFFile(filename, netcdf_mode_r)    # Open existing file for read
518    time = fid.variables['time']       # Timesteps
519    if t is None:
520        t = time[-1]
521    time_interp = get_time_interp(time,t)
522
523    # Get the variables as numeric arrays
524    x = fid.variables['x'][:]                   # x-coordinates of vertices
525    y = fid.variables['y'][:]                   # y-coordinates of vertices
526    elevation = fid.variables['elevation']      # Elevation
527    stage = fid.variables['stage']              # Water level
528    xmomentum = fid.variables['xmomentum']      # Momentum in the x-direction
529    ymomentum = fid.variables['ymomentum']      # Momentum in the y-direction
530
531    starttime = fid.starttime[0]
532    volumes = fid.variables['volumes'][:]       # Connectivity
533    coordinates = num.transpose(num.asarray([x.tolist(), y.tolist()]))
534    # FIXME (Ole): Something like this might be better:
535    #                 concatenate((x, y), axis=1)
536    # or              concatenate((x[:,num.newaxis], x[:,num.newaxis]), axis=1)
537
538    conserved_quantities = []
539    interpolated_quantities = {}
540    other_quantities = []
541
542    # get geo_reference
543    try:                             # sww files don't have to have a geo_ref
544        geo_reference = Geo_reference(NetCDFObject=fid)
545    except: # AttributeError, e:
546        geo_reference = None
547
548    if verbose: log.critical('    getting quantities')
549
550    for quantity in fid.variables.keys():
551        dimensions = fid.variables[quantity].dimensions
552        if 'number_of_timesteps' in dimensions:
553            conserved_quantities.append(quantity)
554            interpolated_quantities[quantity] = \
555                  interpolated_quantity(fid.variables[quantity][:], time_interp)
556        else:
557            other_quantities.append(quantity)
558
559    other_quantities.remove('x')
560    other_quantities.remove('y')
561    #other_quantities.remove('z')
562    other_quantities.remove('volumes')
563    try:
564        other_quantities.remove('stage_range')
565        other_quantities.remove('xmomentum_range')
566        other_quantities.remove('ymomentum_range')
567        other_quantities.remove('elevation_range')
568    except:
569        pass
570
571    conserved_quantities.remove('time')
572
573    if verbose: log.critical('    building domain')
574
575    #    From domain.Domain:
576    #    domain = Domain(coordinates, volumes,\
577    #                    conserved_quantities = conserved_quantities,\
578    #                    other_quantities = other_quantities,zone=zone,\
579    #                    xllcorner=xllcorner, yllcorner=yllcorner)
580
581    # From shallow_water.Domain:
582    coordinates = coordinates.tolist()
583    volumes = volumes.tolist()
584    # FIXME:should this be in mesh? (peter row)
585    if fid.smoothing == 'Yes':
586        unique = False
587    else:
588        unique = True
589    if unique:
590        coordinates, volumes, boundary = weed(coordinates, volumes,boundary)
591
592     
593   
594    try:
595        domain = Domain(coordinates, volumes, boundary)
596    except AssertionError, e:
597        fid.close()
598        msg = 'Domain could not be created: %s. ' \
599              'Perhaps use "fail_if_NaN=False and NaN_filler = ..."' % e
600        raise DataDomainError, msg
601
602    if not boundary is None:
603        domain.boundary = boundary
604
605    domain.geo_reference = geo_reference
606
607    domain.starttime = float(starttime) + float(t)
608    domain.time = 0.0
609
610    for quantity in other_quantities:
611        try:
612            NaN = fid.variables[quantity].missing_value
613        except:
614            pass                       # quantity has no missing_value number
615        X = fid.variables[quantity][:]
616        if very_verbose:
617            log.critical('       %s' % str(quantity))
618            log.critical('        NaN = %s' % str(NaN))
619            log.critical('        max(X)')
620            log.critical('       %s' % str(max(X)))
621            log.critical('        max(X)==NaN')
622            log.critical('       %s' % str(max(X)==NaN))
623            log.critical('')
624        if max(X) == NaN or min(X) == NaN:
625            if fail_if_NaN:
626                msg = 'quantity "%s" contains no_data entry' % quantity
627                raise DataMissingValuesError, msg
628            else:
629                data = (X != NaN)
630                X = (X*data) + (data==0)*NaN_filler
631        if unique:
632            X = num.resize(X, (len(X)/3, 3))
633        domain.set_quantity(quantity, X)
634    #
635    for quantity in conserved_quantities:
636        try:
637            NaN = fid.variables[quantity].missing_value
638        except:
639            pass                       # quantity has no missing_value number
640        X = interpolated_quantities[quantity]
641        if very_verbose:
642            log.critical('       %s' % str(quantity))
643            log.critical('        NaN = %s' % str(NaN))
644            log.critical('        max(X)')
645            log.critical('       %s' % str(max(X)))
646            log.critical('        max(X)==NaN')
647            log.critical('       %s' % str(max(X)==NaN))
648            log.critical('')
649        if max(X) == NaN or min(X) == NaN:
650            if fail_if_NaN:
651                msg = 'quantity "%s" contains no_data entry' % quantity
652                raise DataMissingValuesError, msg
653            else:
654                data = (X != NaN)
655                X = (X*data) + (data==0)*NaN_filler
656        if unique:
657            X = num.resize(X, (X.shape[0]/3, 3))
658        domain.set_quantity(quantity, X)
659
660    fid.close()
661
662    return domain
663
664
665##
666# @brief Interpolate a quantity wrt time.
667# @param saved_quantity The quantity to interpolate.
668# @param time_interp (index, ratio)
669# @return A vector of interpolated values.
670def interpolated_quantity(saved_quantity, time_interp):
671    '''Given an index and ratio, interpolate quantity with respect to time.'''
672
673    index, ratio = time_interp
674
675    Q = saved_quantity
676
677    if ratio > 0:
678        q = (1-ratio)*Q[index] + ratio*Q[index+1]
679    else:
680        q = Q[index]
681
682    #Return vector of interpolated values
683    return q
684
685
686##
687# @brief
688# @parm time
689# @param t
690# @return An (index, ration) tuple.
691def get_time_interp(time, t=None):
692    #Finds the ratio and index for time interpolation.
693    #It is borrowed from previous abstract_2d_finite_volumes code.
694    if t is None:
695        t=time[-1]
696        index = -1
697        ratio = 0.
698    else:
699        T = time
700        tau = t
701        index=0
702        msg = 'Time interval derived from file %s [%s:%s]' \
703              % ('FIXMEfilename', T[0], T[-1])
704        msg += ' does not match model time: %s' % tau
705        if tau < time[0]: raise DataTimeError, msg
706        if tau > time[-1]: raise DataTimeError, msg
707        while tau > time[index]: index += 1
708        while tau < time[index]: index -= 1
709        if tau == time[index]:
710            #Protect against case where tau == time[-1] (last time)
711            # - also works in general when tau == time[i]
712            ratio = 0
713        else:
714            #t is now between index and index+1
715            ratio = (tau - time[index])/(time[index+1] - time[index])
716
717    return (index, ratio)
718
719
720##
721# @brief
722# @param coordinates
723# @param volumes
724# @param boundary
725def weed(coordinates, volumes, boundary=None):
726    if isinstance(coordinates, num.ndarray):
727        coordinates = coordinates.tolist()
728    if isinstance(volumes, num.ndarray):
729        volumes = volumes.tolist()
730
731    unique = False
732    point_dict = {}
733    same_point = {}
734    for i in range(len(coordinates)):
735        point = tuple(coordinates[i])
736        if point_dict.has_key(point):
737            unique = True
738            same_point[i] = point
739            #to change all point i references to point j
740        else:
741            point_dict[point] = i
742            same_point[i] = point
743
744    coordinates = []
745    i = 0
746    for point in point_dict.keys():
747        point = tuple(point)
748        coordinates.append(list(point))
749        point_dict[point] = i
750        i += 1
751
752    for volume in volumes:
753        for i in range(len(volume)):
754            index = volume[i]
755            if index > -1:
756                volume[i] = point_dict[same_point[index]]
757
758    new_boundary = {}
759    if not boundary is None:
760        for segment in boundary.keys():
761            point0 = point_dict[same_point[segment[0]]]
762            point1 = point_dict[same_point[segment[1]]]
763            label = boundary[segment]
764            #FIXME should the bounday attributes be concaterated
765            #('exterior, pond') or replaced ('pond')(peter row)
766
767            if new_boundary.has_key((point0, point1)):
768                new_boundary[(point0,point1)] = new_boundary[(point0, point1)]
769
770            elif new_boundary.has_key((point1, point0)):
771                new_boundary[(point1,point0)] = new_boundary[(point1, point0)]
772            else: new_boundary[(point0, point1)] = label
773
774        boundary = new_boundary
775
776    return coordinates, volumes, boundary
777
778
779##
780# @brief Read DEM file, decimate data, write new DEM file.
781# @param basename_in Stem of the input filename.
782# @param stencil
783# @param cellsize_new New cell size to resample on.
784# @param basename_out Stem of the output filename.
785# @param verbose True if this function is to be verbose.
786def decimate_dem(basename_in, stencil, cellsize_new, basename_out=None,
787                 verbose=False):
788    """Read Digitial Elevation model from the following NetCDF format (.dem)
789
790    Example:
791
792    ncols         3121
793    nrows         1800
794    xllcorner     722000
795    yllcorner     5893000
796    cellsize      25
797    NODATA_value  -9999
798    138.3698 137.4194 136.5062 135.5558 ..........
799
800    Decimate data to cellsize_new using stencil and write to NetCDF dem format.
801    """
802
803    import os
804    from Scientific.IO.NetCDF import NetCDFFile
805
806    root = basename_in
807    inname = root + '.dem'
808
809    #Open existing netcdf file to read
810    infile = NetCDFFile(inname, netcdf_mode_r)
811
812    if verbose: log.critical('Reading DEM from %s' % inname)
813
814    # Read metadata (convert from numpy.int32 to int where appropriate)
815    ncols = int(infile.ncols[0])
816    nrows = int(infile.nrows[0])
817    xllcorner = infile.xllcorner[0]
818    yllcorner = infile.yllcorner[0]
819    cellsize = int(infile.cellsize[0])
820    NODATA_value = int(infile.NODATA_value[0])
821    zone = int(infile.zone[0])
822    false_easting = infile.false_easting[0]
823    false_northing = infile.false_northing[0]
824    projection = infile.projection
825    datum = infile.datum
826    units = infile.units
827
828    dem_elevation = infile.variables['elevation']
829
830    #Get output file name
831    if basename_out == None:
832        outname = root + '_' + repr(cellsize_new) + '.dem'
833    else:
834        outname = basename_out + '.dem'
835
836    if verbose: log.critical('Write decimated NetCDF file to %s' % outname)
837
838    #Determine some dimensions for decimated grid
839    (nrows_stencil, ncols_stencil) = stencil.shape
840    x_offset = ncols_stencil / 2
841    y_offset = nrows_stencil / 2
842    cellsize_ratio = int(cellsize_new / cellsize)
843    ncols_new = 1 + (ncols - ncols_stencil) / cellsize_ratio
844    nrows_new = 1 + (nrows - nrows_stencil) / cellsize_ratio
845
846    #print type(ncols_new), ncols_new
847   
848    #Open netcdf file for output
849    outfile = NetCDFFile(outname, netcdf_mode_w)
850
851    #Create new file
852    outfile.institution = 'Geoscience Australia'
853    outfile.description = 'NetCDF DEM format for compact and portable ' \
854                          'storage of spatial point data'
855
856    #Georeferencing
857    outfile.zone = zone
858    outfile.projection = projection
859    outfile.datum = datum
860    outfile.units = units
861
862    outfile.cellsize = cellsize_new
863    outfile.NODATA_value = NODATA_value
864    outfile.false_easting = false_easting
865    outfile.false_northing = false_northing
866
867    outfile.xllcorner = xllcorner + (x_offset * cellsize)
868    outfile.yllcorner = yllcorner + (y_offset * cellsize)
869    outfile.ncols = ncols_new
870    outfile.nrows = nrows_new
871
872    # dimension definition
873    #print nrows_new, ncols_new, nrows_new*ncols_new
874    #print type(nrows_new), type(ncols_new), type(nrows_new*ncols_new)
875    outfile.createDimension('number_of_points', nrows_new*ncols_new)
876
877    # variable definition
878    outfile.createVariable('elevation', netcdf_float, ('number_of_points',))
879
880    # Get handle to the variable
881    elevation = outfile.variables['elevation']
882
883    dem_elevation_r = num.reshape(dem_elevation, (nrows, ncols))
884
885    #Store data
886    global_index = 0
887    for i in range(nrows_new):
888        if verbose: log.critical('Processing row %d of %d' % (i, nrows_new))
889
890        lower_index = global_index
891        telev = num.zeros(ncols_new, num.float)
892        local_index = 0
893        trow = i * cellsize_ratio
894
895        for j in range(ncols_new):
896            tcol = j * cellsize_ratio
897            tmp = dem_elevation_r[trow:trow+nrows_stencil,
898                                  tcol:tcol+ncols_stencil]
899
900            #if dem contains 1 or more NODATA_values set value in
901            #decimated dem to NODATA_value, else compute decimated
902            #value using stencil
903            if num.sum(num.sum(num.equal(tmp, NODATA_value))) > 0:
904                telev[local_index] = NODATA_value
905            else:
906                telev[local_index] = num.sum(num.sum(tmp * stencil))
907
908            global_index += 1
909            local_index += 1
910
911        upper_index = global_index
912
913        elevation[lower_index:upper_index] = telev
914
915    assert global_index == nrows_new*ncols_new, \
916           'index not equal to number of points'
917
918    infile.close()
919    outfile.close()
920
921
922    ####  URS 2 SWW  ###
923
924# Definitions of various NetCDF dimension names, etc.
925lon_name = 'LON'
926lat_name = 'LAT'
927time_name = 'TIME'
928precision = netcdf_float # So if we want to change the precision its done here
929
930##
931# @brief Clas for a NetCDF data file writer.
932class Write_nc:
933    """Write an nc file.
934
935    Note, this should be checked to meet cdc netcdf conventions for gridded
936    data. http://www.cdc.noaa.gov/cdc/conventions/cdc_netcdf_standard.shtml
937    """
938
939    ##
940    # @brief Instantiate a Write_nc instance.
941    # @param quantity_name
942    # @param file_name
943    # @param time_step_count The number of time steps.
944    # @param time_step The time_step size.
945    # @param lon
946    # @param lat
947    def __init__(self,
948                 quantity_name,
949                 file_name,
950                 time_step_count,
951                 time_step,
952                 lon,
953                 lat):
954        """Instantiate a Write_nc instance (NetCDF file writer).
955
956        time_step_count is the number of time steps.
957        time_step is the time step size
958
959        pre-condition: quantity_name must be 'HA', 'UA'or 'VA'.
960        """
961
962        self.quantity_name = quantity_name
963        quantity_units = {'HA':'CENTIMETERS',
964                          'UA':'CENTIMETERS/SECOND',
965                          'VA':'CENTIMETERS/SECOND'}
966
967        multiplier_dic = {'HA':100.0,   # To convert from m to cm
968                          'UA':100.0,   #             and m/s to cm/sec
969                          'VA':-100.0}  # MUX files have positive x in the
970                                        # Southern direction.  This corrects
971                                        # for it, when writing nc files.
972
973        self.quantity_multiplier =  multiplier_dic[self.quantity_name]
974
975        #self.file_name = file_name
976        self.time_step_count = time_step_count
977        self.time_step = time_step
978
979        # NetCDF file definition
980        self.outfile = NetCDFFile(file_name, netcdf_mode_w)
981        outfile = self.outfile
982
983        #Create new file
984        nc_lon_lat_header(outfile, lon, lat)
985
986        # TIME
987        outfile.createDimension(time_name, None)
988        outfile.createVariable(time_name, precision, (time_name,))
989
990        #QUANTITY
991        outfile.createVariable(self.quantity_name, precision,
992                               (time_name, lat_name, lon_name))
993        outfile.variables[self.quantity_name].missing_value = -1.e+034
994        outfile.variables[self.quantity_name].units = \
995                                 quantity_units[self.quantity_name]
996        outfile.variables[lon_name][:]= ensure_numeric(lon)
997        outfile.variables[lat_name][:]= ensure_numeric(lat)
998
999        #Assume no one will be wanting to read this, while we are writing
1000        #outfile.close()
1001
1002    ##
1003    # @brief Write a time-step of quantity data.
1004    # @param quantity_slice The data to be stored for this time-step.
1005    def store_timestep(self, quantity_slice):
1006        """Write a time slice of quantity info
1007
1008        quantity_slice is the data to be stored at this time step
1009        """
1010
1011        # Get the variables
1012        time = self.outfile.variables[time_name]
1013        quantity = self.outfile.variables[self.quantity_name]
1014
1015        # get index oflice to write
1016        i = len(time)
1017
1018        #Store time
1019        time[i] = i * self.time_step    #self.domain.time
1020        quantity[i,:] = quantity_slice * self.quantity_multiplier
1021
1022    ##
1023    # @brief Close file underlying the class instance.
1024    def close(self):
1025        self.outfile.close()
1026
1027
1028
1029##
1030# @brief Write an NC elevation file.
1031# @param file_out Path to the output file.
1032# @param lon ??
1033# @param lat ??
1034# @param depth_vector The elevation data to write.
1035def write_elevation_nc(file_out, lon, lat, depth_vector):
1036    """Write an nc elevation file."""
1037
1038    # NetCDF file definition
1039    outfile = NetCDFFile(file_out, netcdf_mode_w)
1040
1041    #Create new file
1042    nc_lon_lat_header(outfile, lon, lat)
1043
1044    # ELEVATION
1045    zname = 'ELEVATION'
1046    outfile.createVariable(zname, precision, (lat_name, lon_name))
1047    outfile.variables[zname].units = 'CENTIMETERS'
1048    outfile.variables[zname].missing_value = -1.e+034
1049
1050    outfile.variables[lon_name][:] = ensure_numeric(lon)
1051    outfile.variables[lat_name][:] = ensure_numeric(lat)
1052
1053    depth = num.reshape(depth_vector, (len(lat), len(lon)))
1054    outfile.variables[zname][:] = depth
1055
1056    outfile.close()
1057
1058
1059##
1060# @brief Write lat/lon headers to a NetCDF file.
1061# @param outfile Handle to open file to write to.
1062# @param lon An iterable of the longitudes.
1063# @param lat An iterable of the latitudes.
1064# @note Defines lat/long dimensions and variables. Sets various attributes:
1065#          .point_spacing  and  .units
1066#       and writes lat/lon data.
1067
1068def nc_lon_lat_header(outfile, lon, lat):
1069    """Write lat/lon headers to a NetCDF file.
1070
1071    outfile is the netcdf file handle.
1072    lon - a list/array of the longitudes
1073    lat - a list/array of the latitudes
1074    """
1075
1076    outfile.institution = 'Geoscience Australia'
1077    outfile.description = 'Converted from URS binary C'
1078
1079    # Longitude
1080    outfile.createDimension(lon_name, len(lon))
1081    outfile.createVariable(lon_name, precision, (lon_name,))
1082    outfile.variables[lon_name].point_spacing = 'uneven'
1083    outfile.variables[lon_name].units = 'degrees_east'
1084    outfile.variables[lon_name].assignValue(lon)
1085
1086    # Latitude
1087    outfile.createDimension(lat_name, len(lat))
1088    outfile.createVariable(lat_name, precision, (lat_name,))
1089    outfile.variables[lat_name].point_spacing = 'uneven'
1090    outfile.variables[lat_name].units = 'degrees_north'
1091    outfile.variables[lat_name].assignValue(lat)
1092
1093
1094##
1095# @brief
1096# @param long_lat_dep
1097# @return A tuple (long, lat, quantity).
1098# @note The latitude is the fastest varying dimension - in mux files.
1099def lon_lat2grid(long_lat_dep):
1100    """
1101    given a list of points that are assumed to be an a grid,
1102    return the long's and lat's of the grid.
1103    long_lat_dep is an array where each row is a position.
1104    The first column is longitudes.
1105    The second column is latitudes.
1106
1107    The latitude is the fastest varying dimension - in mux files
1108    """
1109
1110    LONG = 0
1111    LAT = 1
1112    QUANTITY = 2
1113
1114    long_lat_dep = ensure_numeric(long_lat_dep, num.float)
1115
1116    num_points = long_lat_dep.shape[0]
1117    this_rows_long = long_lat_dep[0,LONG]
1118
1119    # Count the length of unique latitudes
1120    i = 0
1121    while long_lat_dep[i,LONG] == this_rows_long and i < num_points:
1122        i += 1
1123
1124    # determine the lats and longsfrom the grid
1125    lat = long_lat_dep[:i, LAT]
1126    long = long_lat_dep[::i, LONG]
1127
1128    lenlong = len(long)
1129    lenlat = len(lat)
1130
1131    msg = 'Input data is not gridded'
1132    assert num_points % lenlat == 0, msg
1133    assert num_points % lenlong == 0, msg
1134
1135    # Test that data is gridded
1136    for i in range(lenlong):
1137        msg = 'Data is not gridded.  It must be for this operation'
1138        first = i * lenlat
1139        last = first + lenlat
1140
1141        assert num.allclose(long_lat_dep[first:last,LAT], lat), msg
1142        assert num.allclose(long_lat_dep[first:last,LONG], long[i]), msg
1143
1144    msg = 'Out of range latitudes/longitudes'
1145    for l in lat:assert -90 < l < 90 , msg
1146    for l in long:assert -180 < l < 180 , msg
1147
1148    # Changing quantity from lat being the fastest varying dimension to
1149    # long being the fastest varying dimension
1150    # FIXME - make this faster/do this a better way
1151    # use numeric transpose, after reshaping the quantity vector
1152    quantity = num.zeros(num_points, num.float)
1153
1154    for lat_i, _ in enumerate(lat):
1155        for long_i, _ in enumerate(long):
1156            q_index = lat_i*lenlong + long_i
1157            lld_index = long_i*lenlat + lat_i
1158            temp = long_lat_dep[lld_index, QUANTITY]
1159            quantity[q_index] = temp
1160
1161    return long, lat, quantity
1162
1163################################################################################
1164# END URS 2 SWW
1165################################################################################
1166
1167################################################################################
1168# URS UNGRIDDED 2 SWW
1169################################################################################
1170
1171### PRODUCING THE POINTS NEEDED FILE ###
1172
1173# Ones used for FESA 2007 results
1174#LL_LAT = -50.0
1175#LL_LONG = 80.0
1176#GRID_SPACING = 1.0/60.0
1177#LAT_AMOUNT = 4800
1178#LONG_AMOUNT = 3600
1179
1180
1181##
1182# @brief
1183# @param file_name
1184# @param boundary_polygon
1185# @param zone
1186# @param ll_lat
1187# @param ll_long
1188# @param grid_spacing
1189# @param lat_amount
1190# @param long_amount
1191# @param isSouthernHemisphere
1192# @param export_csv
1193# @param use_cache
1194# @param verbose True if this function is to be verbose.
1195# @return
1196def URS_points_needed_to_file(file_name, boundary_polygon, zone,
1197                              ll_lat, ll_long,
1198                              grid_spacing,
1199                              lat_amount, long_amount,
1200                              isSouthernHemisphere=True,
1201                              export_csv=False, use_cache=False,
1202                              verbose=False):
1203    """
1204    Given the info to replicate the URS grid and a polygon output
1205    a file that specifies the cloud of boundary points for URS.
1206
1207    This creates a .urs file.  This is in the format used by URS;
1208    1st line is the number of points,
1209    each line after represents a point,in lats and longs.
1210
1211    Note: The polygon cannot cross zones or hemispheres.
1212
1213    A work-a-round for different zones or hemispheres is to run this twice,
1214    once for each zone, and then combine the output.
1215
1216    file_name - name of the urs file produced for David.
1217    boundary_polygon - a list of points that describes a polygon.
1218                      The last point is assumed ot join the first point.
1219                      This is in UTM (lat long would be better though)
1220
1221     This is info about the URS model that needs to be inputted.
1222
1223    ll_lat - lower left latitude, in decimal degrees
1224    ll-long - lower left longitude, in decimal degrees
1225    grid_spacing - in deciamal degrees
1226    lat_amount - number of latitudes
1227    long_amount- number of longs
1228
1229    Don't add the file extension.  It will be added.
1230    """
1231
1232    geo = URS_points_needed(boundary_polygon, zone, ll_lat, ll_long,
1233                            grid_spacing,
1234                            lat_amount, long_amount, isSouthernHemisphere,
1235                            use_cache, verbose)
1236
1237    if not file_name[-4:] == ".urs":
1238        file_name += ".urs"
1239
1240    geo.export_points_file(file_name, isSouthHemisphere=isSouthernHemisphere)
1241
1242    if export_csv:
1243        if file_name[-4:] == ".urs":
1244            file_name = file_name[:-4] + ".csv"
1245        geo.export_points_file(file_name)
1246
1247    return geo
1248
1249
1250##
1251# @brief
1252# @param boundary_polygon
1253# @param zone
1254# @param ll_lat
1255# @param ll_long
1256# @param grid_spacing
1257# @param lat_amount
1258# @param long_amount
1259# @param isSouthHemisphere
1260# @param use_cache
1261# @param verbose
1262def URS_points_needed(boundary_polygon, zone, ll_lat,
1263                      ll_long, grid_spacing,
1264                      lat_amount, long_amount, isSouthHemisphere=True,
1265                      use_cache=False, verbose=False):
1266    args = (boundary_polygon,
1267            zone, ll_lat,
1268            ll_long, grid_spacing,
1269            lat_amount, long_amount, isSouthHemisphere)
1270    kwargs = {}
1271
1272    if use_cache is True:
1273        try:
1274            from anuga.caching import cache
1275        except:
1276            msg = 'Caching was requested, but caching module' \
1277                  'could not be imported'
1278            raise msg
1279
1280        geo = cache(_URS_points_needed,
1281                    args, kwargs,
1282                    verbose=verbose,
1283                    compression=False)
1284    else:
1285        geo = apply(_URS_points_needed, args, kwargs)
1286
1287    return geo
1288
1289
1290##
1291# @brief
1292# @param boundary_polygon An iterable of points that describe a polygon.
1293# @param zone
1294# @param ll_lat Lower left latitude, in decimal degrees
1295# @param ll_long Lower left longitude, in decimal degrees
1296# @param grid_spacing Grid spacing in decimal degrees.
1297# @param lat_amount
1298# @param long_amount
1299# @param isSouthHemisphere
1300def _URS_points_needed(boundary_polygon,
1301                       zone, ll_lat,
1302                       ll_long, grid_spacing,
1303                       lat_amount, long_amount,
1304                       isSouthHemisphere):
1305    """
1306    boundary_polygon - a list of points that describes a polygon.
1307                      The last point is assumed ot join the first point.
1308                      This is in UTM (lat long would b better though)
1309
1310    ll_lat - lower left latitude, in decimal degrees
1311    ll-long - lower left longitude, in decimal degrees
1312    grid_spacing - in decimal degrees
1313
1314    """
1315
1316    msg = "grid_spacing can not be zero"
1317    assert not grid_spacing == 0, msg
1318
1319    a = boundary_polygon
1320
1321    # List of segments.  Each segment is two points.
1322    segs = [i and [a[i-1], a[i]] or [a[len(a)-1], a[0]] for i in range(len(a))]
1323
1324    # convert the segs to Lat's and longs.
1325    # Don't assume the zone of the segments is the same as the lower left
1326    # corner of the lat long data!!  They can easily be in different zones
1327    lat_long_set = frozenset()
1328    for seg in segs:
1329        points_lat_long = points_needed(seg, ll_lat, ll_long, grid_spacing,
1330                                        lat_amount, long_amount, zone,
1331                                        isSouthHemisphere)
1332        lat_long_set |= frozenset(points_lat_long)
1333
1334    if lat_long_set == frozenset([]):
1335        msg = "URS region specified and polygon does not overlap."
1336        raise ValueError, msg
1337
1338    # Warning there is no info in geospatial saying the hemisphere of
1339    # these points.  There should be.
1340    geo = Geospatial_data(data_points=list(lat_long_set),
1341                          points_are_lats_longs=True)
1342
1343    return geo
1344
1345
1346##
1347# @brief Get the points that are needed to interpolate any point a a segment.
1348# @param seg Two points in the UTM.
1349# @param ll_lat Lower left latitude, in decimal degrees
1350# @param ll_long Lower left longitude, in decimal degrees
1351# @param grid_spacing
1352# @param lat_amount
1353# @param long_amount
1354# @param zone
1355# @param isSouthHemisphere
1356# @return A list of points.
1357def points_needed(seg, ll_lat, ll_long, grid_spacing,
1358                  lat_amount, long_amount, zone,
1359                  isSouthHemisphere):
1360    """
1361    seg is two points, in UTM
1362    return a list of the points, in lats and longs that are needed to
1363    interpolate any point on the segment.
1364    """
1365
1366    from math import sqrt
1367
1368    geo_reference = Geo_reference(zone=zone)
1369    geo = Geospatial_data(seg, geo_reference=geo_reference)
1370    seg_lat_long = geo.get_data_points(as_lat_long=True,
1371                                       isSouthHemisphere=isSouthHemisphere)
1372
1373    # 1.415 = 2^0.5, rounded up....
1374    sqrt_2_rounded_up = 1.415
1375    buffer = sqrt_2_rounded_up * grid_spacing
1376
1377    max_lat = max(seg_lat_long[0][0], seg_lat_long[1][0]) + buffer
1378    max_long = max(seg_lat_long[0][1], seg_lat_long[1][1]) + buffer
1379    min_lat = min(seg_lat_long[0][0], seg_lat_long[1][0]) - buffer
1380    min_long = min(seg_lat_long[0][1], seg_lat_long[1][1]) - buffer
1381
1382    first_row = (min_long - ll_long) / grid_spacing
1383
1384    # To round up
1385    first_row_long = int(round(first_row + 0.5))
1386
1387    last_row = (max_long - ll_long) / grid_spacing # round down
1388    last_row_long = int(round(last_row))
1389
1390    first_row = (min_lat - ll_lat) / grid_spacing
1391    # To round up
1392    first_row_lat = int(round(first_row + 0.5))
1393
1394    last_row = (max_lat - ll_lat) / grid_spacing # round down
1395    last_row_lat = int(round(last_row))
1396
1397    max_distance = 157147.4112 * grid_spacing
1398    points_lat_long = []
1399
1400    # Create a list of the lat long points to include.
1401    for index_lat in range(first_row_lat, last_row_lat + 1):
1402        for index_long in range(first_row_long, last_row_long + 1):
1403            lat = ll_lat + index_lat*grid_spacing
1404            long = ll_long + index_long*grid_spacing
1405
1406            #filter here to keep good points
1407            if keep_point(lat, long, seg, max_distance):
1408                points_lat_long.append((lat, long)) #must be hashable
1409
1410    # Now that we have these points, lets throw ones out that are too far away
1411    return points_lat_long
1412
1413
1414##
1415# @brief
1416# @param lat
1417# @param long
1418# @param seg Two points in UTM.
1419# @param max_distance
1420def keep_point(lat, long, seg, max_distance):
1421    """
1422    seg is two points, UTM
1423    """
1424
1425    from math import sqrt
1426
1427    _ , x0, y0 = redfearn(lat, long)
1428    x1 = seg[0][0]
1429    y1 = seg[0][1]
1430    x2 = seg[1][0]
1431    y2 = seg[1][1]
1432    x2_1 = x2-x1
1433    y2_1 = y2-y1
1434    try:
1435        d = abs((x2_1)*(y1-y0)-(x1-x0)*(y2_1))/sqrt( \
1436            (x2_1)*(x2_1)+(y2_1)*(y2_1))
1437    except ZeroDivisionError:
1438        if sqrt((x2_1)*(x2_1)+(y2_1)*(y2_1)) == 0 \
1439           and abs((x2_1)*(y1-y0)-(x1-x0)*(y2_1)) == 0:
1440            return True
1441        else:
1442            return False
1443
1444    return d <= max_distance
1445
1446
1447
1448
1449
1450##
1451# @brief Store keyword params into a CSV file.
1452# @param verbose True if this function is to be verbose.
1453# @param kwargs Dictionary of keyword args to store.
1454# @note If kwargs dict contains 'file_name' key, that has the output filename.
1455#       If not, make up a filename in the output directory.
1456def store_parameters(verbose=False, **kwargs):
1457    """
1458    Store "kwargs" into a temp csv file, if "completed" is in kwargs,
1459    csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
1460
1461    Must have a file_name keyword arg, this is what is writing to.
1462    might be a better way to do this using CSV module Writer and writeDict.
1463
1464    writes file to "output_dir" unless "completed" is in kwargs, then
1465    it writes to "file_name" kwargs
1466    """
1467
1468    import types
1469
1470    # Check that kwargs is a dictionary
1471    if type(kwargs) != types.DictType:
1472        raise TypeError
1473
1474    # is 'completed' in kwargs?
1475    completed = kwargs.has_key('completed')
1476
1477    # get file name and removes from dict and assert that a file_name exists
1478    if completed:
1479        try:
1480            file = str(kwargs['file_name'])
1481        except:
1482            raise 'kwargs must have file_name'
1483    else:
1484        # write temp file in output directory
1485        try:
1486            file = str(kwargs['output_dir']) + 'detail_temp.csv'
1487        except:
1488            raise 'kwargs must have output_dir'
1489
1490    # extracts the header info and the new line info
1491    line = ''
1492    header = ''
1493    count = 0
1494    keys = kwargs.keys()
1495    keys.sort()
1496
1497    # used the sorted keys to create the header and line data
1498    for k in keys:
1499        header += str(k)
1500        line += str(kwargs[k])
1501        count += 1
1502        if count < len(kwargs):
1503            header += ','
1504            line += ','
1505    header += '\n'
1506    line += '\n'
1507
1508    # checks the header info, if the same, then write, if not create a new file
1509    # try to open!
1510    try:
1511        fid = open(file, 'r')
1512        file_header = fid.readline()
1513        fid.close()
1514        if verbose: log.critical('read file header %s' % file_header)
1515    except:
1516        msg = 'try to create new file: %s' % file
1517        if verbose: log.critical(msg)
1518        #tries to open file, maybe directory is bad
1519        try:
1520            fid = open(file, 'w')
1521            fid.write(header)
1522            fid.close()
1523            file_header=header
1524        except:
1525            msg = 'cannot create new file: %s' % file
1526            raise Exception, msg
1527
1528    # if header is same or this is a new file
1529    if file_header == str(header):
1530        fid = open(file, 'a')
1531        fid.write(line)
1532        fid.close()
1533    else:
1534        # backup plan,
1535        # if header is different and has completed will append info to
1536        # end of details_temp.cvs file in output directory
1537        file = str(kwargs['output_dir']) + 'detail_temp.csv'
1538        fid = open(file, 'a')
1539        fid.write(header)
1540        fid.write(line)
1541        fid.close()
1542
1543        if verbose:
1544            log.critical('file %s', file_header.strip('\n'))
1545            log.critical('head %s', header.strip('\n'))
1546        if file_header.strip('\n') == str(header):
1547            log.critical('they equal')
1548
1549        msg = 'WARNING: File header does not match input info, ' \
1550              'the input variables have changed, suggest you change file name'
1551        log.critical(msg)
1552
1553################################################################################
1554# Functions to obtain diagnostics from sww files
1555################################################################################
1556
1557##
1558# @brief Get mesh and quantity data from an SWW file.
1559# @param filename Path to data file to read.
1560# @param quantities UNUSED!
1561# @param verbose True if this function is to be verbose.
1562# @return (mesh, quantities, time) where mesh is the mesh data, quantities is
1563#         a dictionary of {name: value}, and time is the time vector.
1564# @note Quantities extracted: 'elevation', 'stage', 'xmomentum' and 'ymomentum'
1565def get_mesh_and_quantities_from_file(filename,
1566                                      quantities=None,
1567                                      verbose=False):
1568    """Get and rebuild mesh structure and associated quantities from sww file
1569
1570    Input:
1571        filename - Name os sww file
1572        quantities - Names of quantities to load
1573
1574    Output:
1575        mesh - instance of class Interpolate
1576               (including mesh and interpolation functionality)
1577        quantities - arrays with quantity values at each mesh node
1578        time - vector of stored timesteps
1579
1580    This function is used by e.g.:
1581        get_interpolated_quantities_at_polyline_midpoints
1582    """
1583
1584    # FIXME (Ole): Maybe refactor filefunction using this more fundamental code.
1585
1586    import types
1587    from Scientific.IO.NetCDF import NetCDFFile
1588    from shallow_water import Domain
1589    from anuga.abstract_2d_finite_volumes.neighbour_mesh import Mesh
1590
1591    if verbose: log.critical('Reading from %s' % filename)
1592
1593    fid = NetCDFFile(filename, netcdf_mode_r)    # Open existing file for read
1594    time = fid.variables['time'][:]    # Time vector
1595    time += fid.starttime[0]
1596
1597    # Get the variables as numeric arrays
1598    x = fid.variables['x'][:]                   # x-coordinates of nodes
1599    y = fid.variables['y'][:]                   # y-coordinates of nodes
1600    elevation = fid.variables['elevation'][:]   # Elevation
1601    stage = fid.variables['stage'][:]           # Water level
1602    xmomentum = fid.variables['xmomentum'][:]   # Momentum in the x-direction
1603    ymomentum = fid.variables['ymomentum'][:]   # Momentum in the y-direction
1604
1605    # Mesh (nodes (Mx2), triangles (Nx3))
1606    nodes = num.concatenate((x[:,num.newaxis], y[:,num.newaxis]), axis=1)
1607    triangles = fid.variables['volumes'][:]
1608
1609    # Get geo_reference
1610    try:
1611        geo_reference = Geo_reference(NetCDFObject=fid)
1612    except: #AttributeError, e:
1613        # Sww files don't have to have a geo_ref
1614        geo_reference = None
1615
1616    if verbose: log.critical('    building mesh from sww file %s' % filename)
1617
1618    boundary = None
1619
1620    #FIXME (Peter Row): Should this be in mesh?
1621    if fid.smoothing != 'Yes':
1622        nodes = nodes.tolist()
1623        triangles = triangles.tolist()
1624        nodes, triangles, boundary = weed(nodes, triangles, boundary)
1625
1626    try:
1627        mesh = Mesh(nodes, triangles, boundary, geo_reference=geo_reference)
1628    except AssertionError, e:
1629        fid.close()
1630        msg = 'Domain could not be created: %s. "' % e
1631        raise DataDomainError, msg
1632
1633    quantities = {}
1634    quantities['elevation'] = elevation
1635    quantities['stage'] = stage
1636    quantities['xmomentum'] = xmomentum
1637    quantities['ymomentum'] = ymomentum
1638
1639    fid.close()
1640
1641    return mesh, quantities, time
1642
1643
1644##
1645# @brief Get values for quantities interpolated to polyline midpoints from SWW.
1646# @param filename Path to file to read.
1647# @param quantity_names Quantity names to get.
1648# @param polyline Representation of desired cross-section.
1649# @param verbose True if this function is to be verbose.
1650# @return (segments, i_func) where segments is a list of Triangle_intersection
1651#         instances and i_func is an instance of Interpolation_function.
1652# @note For 'polyline' assume absolute UTM coordinates.
1653def get_interpolated_quantities_at_polyline_midpoints(filename,
1654                                                      quantity_names=None,
1655                                                      polyline=None,
1656                                                      verbose=False):
1657    """Get values for quantities interpolated to polyline midpoints from SWW
1658
1659    Input:
1660        filename - Name of sww file
1661        quantity_names - Names of quantities to load
1662        polyline: Representation of desired cross section - it may contain
1663                  multiple sections allowing for complex shapes. Assume
1664                  absolute UTM coordinates.
1665                  Format [[x0, y0], [x1, y1], ...]
1666
1667    Output:
1668        segments: list of instances of class Triangle_intersection
1669        interpolation_function: Instance of class Interpolation_function
1670
1671
1672      This function is used by get_flow_through_cross_section and
1673      get_energy_through_cross_section
1674    """
1675
1676    from anuga.fit_interpolate.interpolate import Interpolation_function
1677
1678    # Get mesh and quantities from sww file
1679    X = get_mesh_and_quantities_from_file(filename,
1680                                          quantities=quantity_names,
1681                                          verbose=verbose)
1682    mesh, quantities, time = X
1683
1684    # Find all intersections and associated triangles.
1685    segments = mesh.get_intersecting_segments(polyline, verbose=verbose)
1686
1687    # Get midpoints
1688    interpolation_points = segment_midpoints(segments)
1689
1690    # Interpolate
1691    if verbose:
1692        log.critical('Interpolating - total number of interpolation points = %d'
1693                     % len(interpolation_points))
1694
1695    I = Interpolation_function(time,
1696                               quantities,
1697                               quantity_names=quantity_names,
1698                               vertex_coordinates=mesh.nodes,
1699                               triangles=mesh.triangles,
1700                               interpolation_points=interpolation_points,
1701                               verbose=verbose)
1702
1703    return segments, I
1704
1705
1706##
1707# @brief Obtain flow (m^3/s) perpendicular to specified cross section.
1708# @param filename Path to file to read.
1709# @param polyline Representation of desired cross-section.
1710# @param verbose Trie if this function is to be verbose.
1711# @return (time, Q) where time and Q are lists of time and flow respectively.
1712def get_flow_through_cross_section(filename, polyline, verbose=False):
1713    """Obtain flow (m^3/s) perpendicular to specified cross section.
1714
1715    Inputs:
1716        filename: Name of sww file
1717        polyline: Representation of desired cross section - it may contain
1718                  multiple sections allowing for complex shapes. Assume
1719                  absolute UTM coordinates.
1720                  Format [[x0, y0], [x1, y1], ...]
1721
1722    Output:
1723        time: All stored times in sww file
1724        Q: Hydrograph of total flow across given segments for all stored times.
1725
1726    The normal flow is computed for each triangle intersected by the polyline
1727    and added up.  Multiple segments at different angles are specified the
1728    normal flows may partially cancel each other.
1729
1730    The typical usage of this function would be to get flow through a channel,
1731    and the polyline would then be a cross section perpendicular to the flow.
1732    """
1733
1734    quantity_names =['elevation',
1735                     'stage',
1736                     'xmomentum',
1737                     'ymomentum']
1738
1739    # Get values for quantities at each midpoint of poly line from sww file
1740    X = get_interpolated_quantities_at_polyline_midpoints(filename,
1741                                                          quantity_names=\
1742                                                              quantity_names,
1743                                                          polyline=polyline,
1744                                                          verbose=verbose)
1745    segments, interpolation_function = X
1746
1747    # Get vectors for time and interpolation_points
1748    time = interpolation_function.time
1749    interpolation_points = interpolation_function.interpolation_points
1750
1751    if verbose: log.critical('Computing hydrograph')
1752
1753    # Compute hydrograph
1754    Q = []
1755    for t in time:
1756        total_flow = 0
1757        for i in range(len(interpolation_points)):
1758            elevation, stage, uh, vh = interpolation_function(t, point_id=i)
1759            normal = segments[i].normal
1760
1761            # Inner product of momentum vector with segment normal [m^2/s]
1762            normal_momentum = uh*normal[0] + vh*normal[1]
1763
1764            # Flow across this segment [m^3/s]
1765            segment_flow = normal_momentum * segments[i].length
1766
1767            # Accumulate
1768            total_flow += segment_flow
1769
1770        # Store flow at this timestep
1771        Q.append(total_flow)
1772
1773
1774    return time, Q
1775
1776
1777##
1778# @brief Get average energy across a cross-section.
1779# @param filename Path to file of interest.
1780# @param polyline Representation of desired cross-section.
1781# @param kind Select energy to compute: 'specific' or 'total'.
1782# @param verbose True if this function is to be verbose.
1783# @return (time, E) where time and E are lists of timestep and energy.
1784def get_energy_through_cross_section(filename,
1785                                     polyline,
1786                                     kind='total',
1787                                     verbose=False):
1788    """Obtain average energy head [m] across specified cross section.
1789
1790    Inputs:
1791        polyline: Representation of desired cross section - it may contain
1792                  multiple sections allowing for complex shapes. Assume
1793                  absolute UTM coordinates.
1794                  Format [[x0, y0], [x1, y1], ...]
1795        kind:     Select which energy to compute.
1796                  Options are 'specific' and 'total' (default)
1797
1798    Output:
1799        E: Average energy [m] across given segments for all stored times.
1800
1801    The average velocity is computed for each triangle intersected by
1802    the polyline and averaged weighted by segment lengths.
1803
1804    The typical usage of this function would be to get average energy of
1805    flow in a channel, and the polyline would then be a cross section
1806    perpendicular to the flow.
1807
1808    #FIXME (Ole) - need name for this energy reflecting that its dimension
1809    is [m].
1810    """
1811
1812    from anuga.config import g, epsilon, velocity_protection as h0
1813
1814    quantity_names =['elevation',
1815                     'stage',
1816                     'xmomentum',
1817                     'ymomentum']
1818
1819    # Get values for quantities at each midpoint of poly line from sww file
1820    X = get_interpolated_quantities_at_polyline_midpoints(filename,
1821                                                          quantity_names=\
1822                                                              quantity_names,
1823                                                          polyline=polyline,
1824                                                          verbose=verbose)
1825    segments, interpolation_function = X
1826
1827    # Get vectors for time and interpolation_points
1828    time = interpolation_function.time
1829    interpolation_points = interpolation_function.interpolation_points
1830
1831    if verbose: log.critical('Computing %s energy' % kind)
1832
1833    # Compute total length of polyline for use with weighted averages
1834    total_line_length = 0.0
1835    for segment in segments:
1836        total_line_length += segment.length
1837
1838    # Compute energy
1839    E = []
1840    for t in time:
1841        average_energy = 0.0
1842        for i, p in enumerate(interpolation_points):
1843            elevation, stage, uh, vh = interpolation_function(t, point_id=i)
1844
1845            # Depth
1846            h = depth = stage-elevation
1847
1848            # Average velocity across this segment
1849            if h > epsilon:
1850                # Use protection against degenerate velocities
1851                u = uh / (h + h0/h)
1852                v = vh / (h + h0/h)
1853            else:
1854                u = v = 0.0
1855
1856            speed_squared = u*u + v*v
1857            kinetic_energy = 0.5 * speed_squared / g
1858
1859            if kind == 'specific':
1860                segment_energy = depth + kinetic_energy
1861            elif kind == 'total':
1862                segment_energy = stage + kinetic_energy
1863            else:
1864                msg = 'Energy kind must be either "specific" or "total". '
1865                msg += 'I got %s' % kind
1866
1867            # Add to weighted average
1868            weigth = segments[i].length / total_line_length
1869            average_energy += segment_energy * weigth
1870
1871        # Store energy at this timestep
1872        E.append(average_energy)
1873
1874    return time, E
1875
1876
1877##
1878# @brief Return highest elevation where depth > 0.
1879# @param filename Path to SWW file of interest.
1880# @param polygon If specified resrict to points inside this polygon.
1881# @param time_interval If specified resrict to within the time specified.
1882# @param verbose True if this function is  to be verbose.
1883def get_maximum_inundation_elevation(filename,
1884                                     polygon=None,
1885                                     time_interval=None,
1886                                     verbose=False):
1887    """Return highest elevation where depth > 0
1888
1889    Usage:
1890    max_runup = get_maximum_inundation_elevation(filename,
1891                                                 polygon=None,
1892                                                 time_interval=None,
1893                                                 verbose=False)
1894
1895    filename is a NetCDF sww file containing ANUGA model output.
1896    Optional arguments polygon and time_interval restricts the maximum
1897    runup calculation
1898    to a points that lie within the specified polygon and time interval.
1899
1900    If no inundation is found within polygon and time_interval the return value
1901    is None signifying "No Runup" or "Everything is dry".
1902
1903    See general function get_maximum_inundation_data for details.
1904    """
1905
1906    runup, _ = get_maximum_inundation_data(filename,
1907                                           polygon=polygon,
1908                                           time_interval=time_interval,
1909                                           verbose=verbose)
1910    return runup
1911
1912
1913##
1914# @brief Return location of highest elevation where h > 0
1915# @param filename Path to SWW file to read.
1916# @param polygon If specified resrict to points inside this polygon.
1917# @param time_interval If specified resrict to within the time specified.
1918# @param verbose True if this function is  to be verbose.
1919def get_maximum_inundation_location(filename,
1920                                    polygon=None,
1921                                    time_interval=None,
1922                                    verbose=False):
1923    """Return location of highest elevation where h > 0
1924
1925    Usage:
1926    max_runup_location = get_maximum_inundation_location(filename,
1927                                                         polygon=None,
1928                                                         time_interval=None,
1929                                                         verbose=False)
1930
1931    filename is a NetCDF sww file containing ANUGA model output.
1932    Optional arguments polygon and time_interval restricts the maximum
1933    runup calculation
1934    to a points that lie within the specified polygon and time interval.
1935
1936    If no inundation is found within polygon and time_interval the return value
1937    is None signifying "No Runup" or "Everything is dry".
1938
1939    See general function get_maximum_inundation_data for details.
1940    """
1941
1942    _, max_loc = get_maximum_inundation_data(filename,
1943                                             polygon=polygon,
1944                                             time_interval=time_interval,
1945                                             verbose=verbose)
1946    return max_loc
1947
1948
1949##
1950# @brief Compute maximum run up height from SWW file.
1951# @param filename Path to SWW file to read.
1952# @param polygon If specified resrict to points inside this polygon.
1953# @param time_interval If specified resrict to within the time specified.
1954# @param use_centroid_values
1955# @param verbose True if this function is to be verbose.
1956# @return (maximal_runup, maximal_runup_location)
1957def get_maximum_inundation_data(filename, polygon=None, time_interval=None,
1958                                use_centroid_values=False,
1959                                verbose=False):
1960    """Compute maximum run up height from sww file.
1961
1962    Usage:
1963    runup, location = get_maximum_inundation_data(filename,
1964                                                  polygon=None,
1965                                                  time_interval=None,
1966                                                  verbose=False)
1967
1968    Algorithm is as in get_maximum_inundation_elevation from
1969    shallow_water_domain except that this function works with the sww file and
1970    computes the maximal runup height over multiple timesteps.
1971
1972    Optional arguments polygon and time_interval restricts the maximum runup
1973    calculation to a points that lie within the specified polygon and time
1974    interval.
1975
1976    Polygon is assumed to be in (absolute) UTM coordinates in the same zone
1977    as domain.
1978
1979    If no inundation is found within polygon and time_interval the return value
1980    is None signifying "No Runup" or "Everything is dry".
1981    """
1982
1983    # We are using nodal values here as that is what is stored in sww files.
1984
1985    # Water depth below which it is considered to be 0 in the model
1986    # FIXME (Ole): Allow this to be specified as a keyword argument as well
1987
1988    from anuga.geometry.polygon import inside_polygon
1989    from anuga.config import minimum_allowed_height
1990    from Scientific.IO.NetCDF import NetCDFFile
1991
1992    dir, base = os.path.split(filename)
1993
1994    iterate_over = get_all_swwfiles(dir, base)
1995
1996    # Read sww file
1997    if verbose: log.critical('Reading from %s' % filename)
1998    # FIXME: Use general swwstats (when done)
1999
2000    maximal_runup = None
2001    maximal_runup_location = None
2002
2003    for file, swwfile in enumerate (iterate_over):
2004        # Read sww file
2005        filename = join(dir, swwfile+'.sww')
2006
2007        if verbose: log.critical('Reading from %s' % filename)
2008        # FIXME: Use general swwstats (when done)
2009
2010        fid = NetCDFFile(filename)
2011
2012        # Get geo_reference
2013        # sww files don't have to have a geo_ref
2014        try:
2015            geo_reference = Geo_reference(NetCDFObject=fid)
2016        except AttributeError, e:
2017            geo_reference = Geo_reference() # Default georef object
2018
2019        xllcorner = geo_reference.get_xllcorner()
2020        yllcorner = geo_reference.get_yllcorner()
2021        zone = geo_reference.get_zone()
2022
2023        # Get extent
2024        volumes = fid.variables['volumes'][:]
2025        x = fid.variables['x'][:] + xllcorner
2026        y = fid.variables['y'][:] + yllcorner
2027
2028        # Get the relevant quantities (Convert from single precison)
2029        elevation = num.array(fid.variables['elevation'][:], num.float)
2030        stage = num.array(fid.variables['stage'][:], num.float)
2031
2032        # Here's where one could convert nodal information to centroid
2033        # information but is probably something we need to write in C.
2034        # Here's a Python thought which is NOT finished!!!
2035        if use_centroid_values is True:
2036            x = get_centroid_values(x, volumes)
2037            y = get_centroid_values(y, volumes)
2038            elevation = get_centroid_values(elevation, volumes)
2039
2040        # Spatial restriction
2041        if polygon is not None:
2042            msg = 'polygon must be a sequence of points.'
2043            assert len(polygon[0]) == 2, msg
2044            # FIXME (Ole): Make a generic polygon input check in polygon.py
2045            # and call it here
2046            points = num.ascontiguousarray(num.concatenate((x[:,num.newaxis],
2047                                                            y[:,num.newaxis]),
2048                                                            axis=1))
2049            point_indices = inside_polygon(points, polygon)
2050
2051            # Restrict quantities to polygon
2052            elevation = num.take(elevation, point_indices, axis=0)
2053            stage = num.take(stage, point_indices, axis=1)
2054
2055            # Get info for location of maximal runup
2056            points_in_polygon = num.take(points, point_indices, axis=0)
2057
2058            x = points_in_polygon[:,0]
2059            y = points_in_polygon[:,1]
2060        else:
2061            # Take all points
2062            point_indices = num.arange(len(x))
2063
2064        # Temporal restriction
2065        time = fid.variables['time'][:]
2066        all_timeindices = num.arange(len(time))
2067        if time_interval is not None:
2068            msg = 'time_interval must be a sequence of length 2.'
2069            assert len(time_interval) == 2, msg
2070            msg = 'time_interval %s must not be decreasing.' % time_interval
2071            assert time_interval[1] >= time_interval[0], msg
2072            msg = 'Specified time interval [%.8f:%.8f] ' % tuple(time_interval)
2073            msg += 'must does not match model time interval: [%.8f, %.8f]\n' \
2074                   % (time[0], time[-1])
2075            if time_interval[1] < time[0]: raise ValueError(msg)
2076            if time_interval[0] > time[-1]: raise ValueError(msg)
2077
2078            # Take time indices corresponding to interval (& is bitwise AND)
2079            timesteps = num.compress((time_interval[0] <= time) \
2080                                     & (time <= time_interval[1]),
2081                                     all_timeindices)
2082
2083            msg = 'time_interval %s did not include any model timesteps.' \
2084                  % time_interval
2085            assert not num.alltrue(timesteps == 0), msg
2086        else:
2087            # Take them all
2088            timesteps = all_timeindices
2089
2090        fid.close()
2091
2092        # Compute maximal runup for each timestep
2093        #maximal_runup = None
2094        #maximal_runup_location = None
2095        #maximal_runups = [None]
2096        #maximal_runup_locations = [None]
2097
2098        for i in timesteps:
2099            if use_centroid_values is True:
2100                stage_i = get_centroid_values(stage[i,:], volumes)
2101            else:
2102                stage_i = stage[i,:]
2103
2104            depth = stage_i - elevation
2105
2106            # Get wet nodes i.e. nodes with depth>0 within given region
2107            # and timesteps
2108            wet_nodes = num.compress(depth > minimum_allowed_height,
2109                                     num.arange(len(depth)))
2110
2111            if num.alltrue(wet_nodes == 0):
2112                runup = None
2113            else:
2114                # Find maximum elevation among wet nodes
2115                wet_elevation = num.take(elevation, wet_nodes, axis=0)
2116                runup_index = num.argmax(wet_elevation)
2117                runup = max(wet_elevation)
2118                assert wet_elevation[runup_index] == runup       # Must be True
2119
2120            if runup > maximal_runup:
2121                maximal_runup = runup      # works even if maximal_runup is None
2122
2123                # Record location
2124                wet_x = num.take(x, wet_nodes, axis=0)
2125                wet_y = num.take(y, wet_nodes, axis=0)
2126                maximal_runup_location = [wet_x[runup_index],wet_y[runup_index]]
2127
2128    return maximal_runup, maximal_runup_location
2129
2130
2131##
2132# @brief Convert points to a polygon (?)
2133# @param points_file The points file.
2134# @param minimum_triangle_angle ??
2135# @return
2136def points2polygon(points_file, minimum_triangle_angle=3.0):
2137    """
2138    WARNING: This function is not fully working.
2139
2140    Function to return a polygon returned from alpha shape, given a points file.
2141
2142    WARNING: Alpha shape returns multiple polygons, but this function only
2143             returns one polygon.
2144    """
2145
2146    from anuga.pmesh.mesh import Mesh, importMeshFromFile
2147    from anuga.shallow_water import Domain
2148    from anuga.pmesh.mesh_interface import create_mesh_from_regions
2149
2150    mesh = importMeshFromFile(points_file)
2151    mesh.auto_segment()
2152    mesh.exportASCIIsegmentoutlinefile("outline.tsh")
2153    mesh2 = importMeshFromFile("outline.tsh")
2154    mesh2.generate_mesh(maximum_triangle_area=1000000000,
2155                        minimum_triangle_angle=minimum_triangle_angle,
2156                        verbose=False)
2157    mesh2.export_mesh_file('outline_meshed.tsh')
2158    domain = Domain("outline_meshed.tsh", use_cache = False)
2159    polygon =  domain.get_boundary_polygon()
2160    return polygon
2161
2162
2163################################################################################
2164
2165if __name__ == "__main__":
2166    # setting umask from config to force permissions for all files and
2167    # directories created to the same. (it was noticed the "mpirun" doesn't
2168    # honour the umask set in your .bashrc etc file)
2169
2170    from config import umask
2171    import os
2172    os.umask(umask)
Note: See TracBrowser for help on using the repository browser.