source: trunk/anuga_core/source/anuga/file_conversion/file_conversion.py @ 7776

Last change on this file since 7776 was 7776, checked in by hudson, 13 years ago

Removed redundant data_manager class. Unit tests are running, but may fail.

File size: 15.9 KB
Line 
1""" Conversion routines.
2    ANUGA needs to deal with many different file formats, and this
3    module provides routines for easily converting between them.
4   
5    These routines are necessarily high level, sitting above the various
6    ANUGA modules. They take a file as input, and output a file.
7"""
8
9#non ANUGA imports
10from Scientific.IO.NetCDF import NetCDFFile
11import numpy as num
12import os.path
13import os
14
15#ANUGA imports
16from anuga.coordinate_transforms.geo_reference import Geo_reference, \
17     write_NetCDF_georeference, ensure_geo_reference
18from anuga.abstract_2d_finite_volumes.pmesh2domain import \
19     pmesh_to_domain_instance
20from anuga.utilities.numerical_tools import ensure_numeric
21from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a, \
22                            netcdf_float
23
24
25#shallow water imports
26from anuga.file.sww import Read_sww, Write_sww
27from anuga.shallow_water.shallow_water_domain import Domain
28from anuga.shallow_water.shallow_water_domain import Domain
29
30
31def sww2obj(basefilename, size):
32    """ Convert netcdf based data output to obj
33
34        Convert SWW data to OBJ data.
35        basefilename Stem of filename, needs size and extension added.
36        size The number of lines to write.
37    """
38
39    from Scientific.IO.NetCDF import NetCDFFile
40
41    # Get NetCDF
42    FN = create_filename('.', basefilename, 'sww', size)
43    log.critical('Reading from %s' % FN)
44    fid = NetCDFFile(FN, netcdf_mode_r)  #Open existing file for read
45
46    # Get the variables
47    x = fid.variables['x']
48    y = fid.variables['y']
49    z = fid.variables['elevation']
50    time = fid.variables['time']
51    stage = fid.variables['stage']
52
53    M = size  #Number of lines
54    xx = num.zeros((M,3), num.float)
55    yy = num.zeros((M,3), num.float)
56    zz = num.zeros((M,3), num.float)
57
58    for i in range(M):
59        for j in range(3):
60            xx[i,j] = x[i+j*M]
61            yy[i,j] = y[i+j*M]
62            zz[i,j] = z[i+j*M]
63
64    # Write obj for bathymetry
65    FN = create_filename('.', basefilename, 'obj', size)
66    write_obj(FN,xx,yy,zz)
67
68    # Now read all the data with variable information, combine with
69    # x,y info and store as obj
70    for k in range(len(time)):
71        t = time[k]
72        log.critical('Processing timestep %f' % t)
73
74        for i in range(M):
75            for j in range(3):
76                zz[i,j] = stage[k,i+j*M]
77
78        #Write obj for variable data
79        #FN = create_filename(basefilename, 'obj', size, time=t)
80        FN = create_filename('.', basefilename[:5], 'obj', size, time=t)
81        write_obj(FN, xx, yy, zz)
82
83
84##
85# @brief
86# @param basefilename Stem of filename, needs size and extension added.
87def dat2obj(basefilename):
88    """Convert line based data output to obj
89    FIXME: Obsolete?
90    """
91
92    import glob, os
93    from anuga.config import data_dir
94
95    # Get bathymetry and x,y's
96    lines = open(data_dir+os.sep+basefilename+'_geometry.dat', 'r').readlines()
97
98    M = len(lines)  #Number of lines
99    x = num.zeros((M,3), num.float)
100    y = num.zeros((M,3), num.float)
101    z = num.zeros((M,3), num.float)
102
103    for i, line in enumerate(lines):
104        tokens = line.split()
105        values = map(float, tokens)
106
107        for j in range(3):
108            x[i,j] = values[j*3]
109            y[i,j] = values[j*3+1]
110            z[i,j] = values[j*3+2]
111
112    # Write obj for bathymetry
113    write_obj(data_dir + os.sep + basefilename + '_geometry', x, y, z)
114
115    # Now read all the data files with variable information, combine with
116    # x,y info and store as obj.
117
118    files = glob.glob(data_dir + os.sep + basefilename + '*.dat')
119    for filename in files:
120        log.critical('Processing %s' % filename)
121
122        lines = open(data_dir + os.sep + filename, 'r').readlines()
123        assert len(lines) == M
124        root, ext = os.path.splitext(filename)
125
126        # Get time from filename
127        i0 = filename.find('_time=')
128        if i0 == -1:
129            #Skip bathymetry file
130            continue
131
132        i0 += 6  #Position where time starts
133        i1 = filename.find('.dat')
134
135        if i1 > i0:
136            t = float(filename[i0:i1])
137        else:
138            raise DataTimeError, 'Hmmmm'
139
140        for i, line in enumerate(lines):
141            tokens = line.split()
142            values = map(float,tokens)
143
144            for j in range(3):
145                z[i,j] = values[j]
146
147        # Write obj for variable data
148        write_obj(data_dir + os.sep + basefilename + '_time=%.4f' % t, x, y, z)
149
150##
151# @brief Convert time-series text file to TMS file.
152# @param filename
153# @param quantity_names
154# @param time_as_seconds
155def timefile2netcdf(filename, quantity_names=None, time_as_seconds=False):
156    """Template for converting typical text files with time series to
157    NetCDF tms file.
158
159    The file format is assumed to be either two fields separated by a comma:
160
161        time [DD/MM/YY hh:mm:ss], value0 value1 value2 ...
162
163    E.g
164
165      31/08/04 00:00:00, 1.328223 0 0
166      31/08/04 00:15:00, 1.292912 0 0
167
168    or time (seconds), value0 value1 value2 ...
169
170      0.0, 1.328223 0 0
171      0.1, 1.292912 0 0
172
173    will provide a time dependent function f(t) with three attributes
174
175    filename is assumed to be the rootname with extenisons .txt and .sww
176    """
177
178    import time, calendar
179    from anuga.config import time_format
180    from anuga.utilities.numerical_tools import ensure_numeric
181
182    file_text = filename + '.txt'
183    fid = open(file_text)
184    line = fid.readline()
185    fid.close()
186
187    fields = line.split(',')
188    msg = "File %s must have the format 'datetime, value0 value1 value2 ...'" \
189          % file_text
190    assert len(fields) == 2, msg
191
192    if not time_as_seconds:
193        try:
194            starttime = calendar.timegm(time.strptime(fields[0], time_format))
195        except ValueError:
196            msg = 'First field in file %s must be' % file_text
197            msg += ' date-time with format %s.\n' % time_format
198            msg += 'I got %s instead.' % fields[0]
199            raise DataTimeError, msg
200    else:
201        try:
202            starttime = float(fields[0])
203        except Error:
204            msg = "Bad time format"
205            raise DataTimeError, msg
206
207    # Split values
208    values = []
209    for value in fields[1].split():
210        values.append(float(value))
211
212    q = ensure_numeric(values)
213
214    msg = 'ERROR: File must contain at least one independent value'
215    assert len(q.shape) == 1, msg
216
217    # Read times proper
218    from anuga.config import time_format
219    import time, calendar
220
221    fid = open(file_text)
222    lines = fid.readlines()
223    fid.close()
224
225    N = len(lines)
226    d = len(q)
227
228    T = num.zeros(N, num.float)       # Time
229    Q = num.zeros((N, d), num.float)  # Values
230
231    for i, line in enumerate(lines):
232        fields = line.split(',')
233        if not time_as_seconds:
234            realtime = calendar.timegm(time.strptime(fields[0], time_format))
235        else:
236            realtime = float(fields[0])
237        T[i] = realtime - starttime
238
239        for j, value in enumerate(fields[1].split()):
240            Q[i, j] = float(value)
241
242    msg = 'File %s must list time as a monotonuosly ' % filename
243    msg += 'increasing sequence'
244    assert num.alltrue(T[1:] - T[:-1] > 0), msg
245
246    #Create NetCDF file
247    from Scientific.IO.NetCDF import NetCDFFile
248
249    fid = NetCDFFile(filename + '.tms', netcdf_mode_w)
250
251    fid.institution = 'Geoscience Australia'
252    fid.description = 'Time series'
253
254    #Reference point
255    #Start time in seconds since the epoch (midnight 1/1/1970)
256    #FIXME: Use Georef
257    fid.starttime = starttime
258
259    # dimension definitions
260    #fid.createDimension('number_of_volumes', self.number_of_volumes)
261    #fid.createDimension('number_of_vertices', 3)
262
263    fid.createDimension('number_of_timesteps', len(T))
264
265    fid.createVariable('time', netcdf_float, ('number_of_timesteps',))
266
267    fid.variables['time'][:] = T
268
269    for i in range(Q.shape[1]):
270        try:
271            name = quantity_names[i]
272        except:
273            name = 'Attribute%d' % i
274
275        fid.createVariable(name, netcdf_float, ('number_of_timesteps',))
276        fid.variables[name][:] = Q[:,i]
277
278    fid.close()
279   
280   
281
282##
283# @brief
284# @param filename
285# @param verbose
286def tsh2sww(filename, verbose=False):
287    """
288    to check if a tsh/msh file 'looks' good.
289    """
290
291    if verbose == True: log.critical('Creating domain from %s' % filename)
292
293    domain = pmesh_to_domain_instance(filename, Domain)
294
295    if verbose == True: log.critical("Number of triangles = %s" % len(domain))
296
297    domain.smooth = True
298    domain.format = 'sww'   #Native netcdf visualisation format
299    file_path, filename = path.split(filename)
300    filename, ext = path.splitext(filename)
301    domain.set_name(filename)
302    domain.reduction = mean
303
304    if verbose == True: log.critical("file_path = %s" % file_path)
305
306    if file_path == "":
307        file_path = "."
308    domain.set_datadir(file_path)
309
310    if verbose == True:
311        log.critical("Output written to %s%s%s.%s"
312                     % (domain.get_datadir(), sep, domain.get_name(),
313                        domain.format))
314
315    sww = SWW_file(domain)
316    sww.store_connectivity()
317    sww.store_timestep()
318
319
320##
321# @brief Convert URS file to SWW file.
322# @param basename_in Stem of the input filename.
323# @param basename_out Stem of the output filename.
324# @param verbose True if this function is to be verbose.
325# @param remove_nc_files
326# @param minlat Sets extent of area to be used.  If not supplied, full extent.
327# @param maxlat Sets extent of area to be used.  If not supplied, full extent.
328# @param minlon Sets extent of area to be used.  If not supplied, full extent.
329# @param maxlon Sets extent of area to be used.  If not supplied, full extent.
330# @param mint
331# @param maxt
332# @param mean_stage
333# @param origin A 3-tuple with geo referenced UTM coordinates
334# @param zscale
335# @param fail_on_NaN
336# @param NaN_filler
337# @param elevation
338# @note Also convert latitude and longitude to UTM. All coordinates are
339#       assumed to be given in the GDA94 datum.
340def urs2sww(basename_in='o', basename_out=None, verbose=False,
341            remove_nc_files=True,
342            minlat=None, maxlat=None,
343            minlon=None, maxlon=None,
344            mint=None, maxt=None,
345            mean_stage=0,
346            origin=None,
347            zscale=1,
348            fail_on_NaN=True,
349            NaN_filler=0):
350    """Convert a URS file to an SWW file.
351    Convert URS C binary format for wave propagation to
352    sww format native to abstract_2d_finite_volumes.
353
354    Specify only basename_in and read files of the form
355    basefilename-z-mux2, basefilename-e-mux2 and
356    basefilename-n-mux2 containing relative height,
357    x-velocity and y-velocity, respectively.
358
359    Also convert latitude and longitude to UTM. All coordinates are
360    assumed to be given in the GDA94 datum. The latitude and longitude
361    information is for  a grid.
362
363    min's and max's: If omitted - full extend is used.
364    To include a value min may equal it, while max must exceed it.
365    Lat and lon are assumed to be in decimal degrees.
366    NOTE: minlon is the most east boundary.
367
368    origin is a 3-tuple with geo referenced
369    UTM coordinates (zone, easting, northing)
370    It will be the origin of the sww file. This shouldn't be used,
371    since all of anuga should be able to handle an arbitary origin.
372
373    URS C binary format has data orgainised as TIME, LONGITUDE, LATITUDE
374    which means that latitude is the fastest
375    varying dimension (row major order, so to speak)
376
377    In URS C binary the latitudes and longitudes are in assending order.
378    """
379
380    if basename_out == None:
381        basename_out = basename_in
382
383    files_out = urs2nc(basename_in, basename_out)
384
385    ferret2sww(basename_out,
386               minlat=minlat,
387               maxlat=maxlat,
388               minlon=minlon,
389               maxlon=maxlon,
390               mint=mint,
391               maxt=maxt,
392               mean_stage=mean_stage,
393               origin=origin,
394               zscale=zscale,
395               fail_on_NaN=fail_on_NaN,
396               NaN_filler=NaN_filler,
397               inverted_bathymetry=True,
398               verbose=verbose)
399   
400    if remove_nc_files:
401        for file_out in files_out:
402            os.remove(file_out)
403
404
405
406##
407# @brief Return max&min indexes (for slicing) of area specified.
408# @param latitudes_ref ??
409# @param longitudes_ref ??
410# @param minlat Minimum latitude of specified area.
411# @param maxlat Maximum latitude of specified area.
412# @param minlon Minimum longitude of specified area.
413# @param maxlon Maximum longitude of specified area.
414# @return Tuple (lat_min_index, lat_max_index, lon_min_index, lon_max_index)
415def get_min_max_indices(latitudes_ref, longitudes_ref,
416                         minlat=None, maxlat=None,
417                         minlon=None, maxlon=None):
418    """
419    Return max, min indexes (for slicing) of the lat and long arrays to cover
420    the area specified with min/max lat/long.
421
422    Think of the latitudes and longitudes describing a 2d surface.
423    The area returned is, if possible, just big enough to cover the
424    inputed max/min area. (This will not be possible if the max/min area
425    has a section outside of the latitudes/longitudes area.)
426
427    asset  longitudes are sorted,
428    long - from low to high (west to east, eg 148 - 151)
429    assert latitudes are sorted, ascending or decending
430    """
431
432    latitudes = latitudes_ref[:]
433    longitudes = longitudes_ref[:]
434
435    latitudes = ensure_numeric(latitudes)
436    longitudes = ensure_numeric(longitudes)
437
438    assert num.allclose(num.sort(longitudes), longitudes)
439
440    #print latitudes[0],longitudes[0]
441    #print len(latitudes),len(longitudes)
442    #print latitudes[len(latitudes)-1],longitudes[len(longitudes)-1]
443
444    lat_ascending = True
445    if not num.allclose(num.sort(latitudes), latitudes):
446        lat_ascending = False
447        # reverse order of lat, so it's in ascending order
448        latitudes = latitudes[::-1]
449        assert num.allclose(num.sort(latitudes), latitudes)
450
451    largest_lat_index = len(latitudes)-1
452
453    #Cut out a smaller extent.
454    if minlat == None:
455        lat_min_index = 0
456    else:
457        lat_min_index = num.searchsorted(latitudes, minlat)-1
458        if lat_min_index < 0:
459            lat_min_index = 0
460
461    if maxlat == None:
462        lat_max_index = largest_lat_index #len(latitudes)
463    else:
464        lat_max_index = num.searchsorted(latitudes, maxlat)
465        if lat_max_index > largest_lat_index:
466            lat_max_index = largest_lat_index
467
468    if minlon == None:
469        lon_min_index = 0
470    else:
471        lon_min_index = num.searchsorted(longitudes, minlon)-1
472        if lon_min_index < 0:
473            lon_min_index = 0
474
475    if maxlon == None:
476        lon_max_index = len(longitudes)
477    else:
478        lon_max_index = num.searchsorted(longitudes, maxlon)
479
480    # Reversing the indexes, if the lat array is decending
481    if lat_ascending is False:
482        lat_min_index, lat_max_index = largest_lat_index - lat_max_index, \
483                                       largest_lat_index - lat_min_index
484    lat_max_index = lat_max_index + 1 # taking into account how slicing works
485    lon_max_index = lon_max_index + 1 # taking into account how slicing works
486
487    return lat_min_index, lat_max_index, lon_min_index, lon_max_index
488
489
490##
491# @brief
492# @param filename
493# @param x
494# @param y
495# @param z
496def write_obj(filename, x, y, z):
497    """Store x,y,z vectors into filename (obj format).
498
499       Vectors are assumed to have dimension (M,3) where
500       M corresponds to the number elements.
501       triangles are assumed to be disconnected
502
503       The three numbers in each vector correspond to three vertices,
504
505       e.g. the x coordinate of vertex 1 of element i is in x[i,1]
506    """
507
508    import os.path
509
510    root, ext = os.path.splitext(filename)
511    if ext == '.obj':
512        FN = filename
513    else:
514        FN = filename + '.obj'
515
516    outfile = open(FN, 'wb')
517    outfile.write("# Triangulation as an obj file\n")
518
519    M, N = x.shape
520    assert N == 3  #Assuming three vertices per element
521
522    for i in range(M):
523        for j in range(N):
524            outfile.write("v %f %f %f\n" % (x[i,j], y[i,j], z[i,j]))
525
526    for i in range(M):
527        base = i * N
528        outfile.write("f %d %d %d\n" % (base+1, base+2, base+3))
529
530    outfile.close()
531
Note: See TracBrowser for help on using the repository browser.