source: trunk/anuga_core/source/anuga/file_conversion/file_conversion.py @ 7761

Last change on this file since 7761 was 7758, checked in by hudson, 14 years ago

Added extra file conversion classes to file_conversion.

File size: 19.3 KB
Line 
1""" Conversion routines.
2    ANUGA needs to deal with many different file formats, and this
3    module provides routines for easily converting between them.
4   
5    These routines are necessarily high level, sitting above the various
6    ANUGA modules. They take a file as input, and output a file.
7"""
8
9#non ANUGA imports
10from Scientific.IO.NetCDF import NetCDFFile
11import numpy as num
12import os.path
13import os
14
15#ANUGA imports
16from anuga.coordinate_transforms.geo_reference import Geo_reference, \
17     write_NetCDF_georeference, ensure_geo_reference
18from anuga.abstract_2d_finite_volumes.pmesh2domain import \
19     pmesh_to_domain_instance
20from anuga.utilities.numerical_tools import ensure_numeric
21from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a, \
22                            netcdf_float
23
24
25#shallow water imports
26from anuga.shallow_water.sww_file import Read_sww, Write_sww
27from anuga.shallow_water.shallow_water_domain import Domain
28from anuga.shallow_water.shallow_water_domain import Domain
29
30
31def sww2obj(basefilename, size):
32    """ Convert netcdf based data output to obj
33
34        Convert SWW data to OBJ data.
35        basefilename Stem of filename, needs size and extension added.
36        size The number of lines to write.
37    """
38
39    from Scientific.IO.NetCDF import NetCDFFile
40
41    # Get NetCDF
42    FN = create_filename('.', basefilename, 'sww', size)
43    log.critical('Reading from %s' % FN)
44    fid = NetCDFFile(FN, netcdf_mode_r)  #Open existing file for read
45
46    # Get the variables
47    x = fid.variables['x']
48    y = fid.variables['y']
49    z = fid.variables['elevation']
50    time = fid.variables['time']
51    stage = fid.variables['stage']
52
53    M = size  #Number of lines
54    xx = num.zeros((M,3), num.float)
55    yy = num.zeros((M,3), num.float)
56    zz = num.zeros((M,3), num.float)
57
58    for i in range(M):
59        for j in range(3):
60            xx[i,j] = x[i+j*M]
61            yy[i,j] = y[i+j*M]
62            zz[i,j] = z[i+j*M]
63
64    # Write obj for bathymetry
65    FN = create_filename('.', basefilename, 'obj', size)
66    write_obj(FN,xx,yy,zz)
67
68    # Now read all the data with variable information, combine with
69    # x,y info and store as obj
70    for k in range(len(time)):
71        t = time[k]
72        log.critical('Processing timestep %f' % t)
73
74        for i in range(M):
75            for j in range(3):
76                zz[i,j] = stage[k,i+j*M]
77
78        #Write obj for variable data
79        #FN = create_filename(basefilename, 'obj', size, time=t)
80        FN = create_filename('.', basefilename[:5], 'obj', size, time=t)
81        write_obj(FN, xx, yy, zz)
82
83
84##
85# @brief
86# @param basefilename Stem of filename, needs size and extension added.
87def dat2obj(basefilename):
88    """Convert line based data output to obj
89    FIXME: Obsolete?
90    """
91
92    import glob, os
93    from anuga.config import data_dir
94
95    # Get bathymetry and x,y's
96    lines = open(data_dir+os.sep+basefilename+'_geometry.dat', 'r').readlines()
97
98    M = len(lines)  #Number of lines
99    x = num.zeros((M,3), num.float)
100    y = num.zeros((M,3), num.float)
101    z = num.zeros((M,3), num.float)
102
103    for i, line in enumerate(lines):
104        tokens = line.split()
105        values = map(float, tokens)
106
107        for j in range(3):
108            x[i,j] = values[j*3]
109            y[i,j] = values[j*3+1]
110            z[i,j] = values[j*3+2]
111
112    # Write obj for bathymetry
113    write_obj(data_dir + os.sep + basefilename + '_geometry', x, y, z)
114
115    # Now read all the data files with variable information, combine with
116    # x,y info and store as obj.
117
118    files = glob.glob(data_dir + os.sep + basefilename + '*.dat')
119    for filename in files:
120        log.critical('Processing %s' % filename)
121
122        lines = open(data_dir + os.sep + filename, 'r').readlines()
123        assert len(lines) == M
124        root, ext = os.path.splitext(filename)
125
126        # Get time from filename
127        i0 = filename.find('_time=')
128        if i0 == -1:
129            #Skip bathymetry file
130            continue
131
132        i0 += 6  #Position where time starts
133        i1 = filename.find('.dat')
134
135        if i1 > i0:
136            t = float(filename[i0:i1])
137        else:
138            raise DataTimeError, 'Hmmmm'
139
140        for i, line in enumerate(lines):
141            tokens = line.split()
142            values = map(float,tokens)
143
144            for j in range(3):
145                z[i,j] = values[j]
146
147        # Write obj for variable data
148        write_obj(data_dir + os.sep + basefilename + '_time=%.4f' % t, x, y, z)
149
150##
151# @brief Convert time-series text file to TMS file.
152# @param filename
153# @param quantity_names
154# @param time_as_seconds
155def timefile2netcdf(filename, quantity_names=None, time_as_seconds=False):
156    """Template for converting typical text files with time series to
157    NetCDF tms file.
158
159    The file format is assumed to be either two fields separated by a comma:
160
161        time [DD/MM/YY hh:mm:ss], value0 value1 value2 ...
162
163    E.g
164
165      31/08/04 00:00:00, 1.328223 0 0
166      31/08/04 00:15:00, 1.292912 0 0
167
168    or time (seconds), value0 value1 value2 ...
169
170      0.0, 1.328223 0 0
171      0.1, 1.292912 0 0
172
173    will provide a time dependent function f(t) with three attributes
174
175    filename is assumed to be the rootname with extenisons .txt and .sww
176    """
177
178    import time, calendar
179    from anuga.config import time_format
180    from anuga.utilities.numerical_tools import ensure_numeric
181
182    file_text = filename + '.txt'
183    fid = open(file_text)
184    line = fid.readline()
185    fid.close()
186
187    fields = line.split(',')
188    msg = "File %s must have the format 'datetime, value0 value1 value2 ...'" \
189          % file_text
190    assert len(fields) == 2, msg
191
192    if not time_as_seconds:
193        try:
194            starttime = calendar.timegm(time.strptime(fields[0], time_format))
195        except ValueError:
196            msg = 'First field in file %s must be' % file_text
197            msg += ' date-time with format %s.\n' % time_format
198            msg += 'I got %s instead.' % fields[0]
199            raise DataTimeError, msg
200    else:
201        try:
202            starttime = float(fields[0])
203        except Error:
204            msg = "Bad time format"
205            raise DataTimeError, msg
206
207    # Split values
208    values = []
209    for value in fields[1].split():
210        values.append(float(value))
211
212    q = ensure_numeric(values)
213
214    msg = 'ERROR: File must contain at least one independent value'
215    assert len(q.shape) == 1, msg
216
217    # Read times proper
218    from anuga.config import time_format
219    import time, calendar
220
221    fid = open(file_text)
222    lines = fid.readlines()
223    fid.close()
224
225    N = len(lines)
226    d = len(q)
227
228    T = num.zeros(N, num.float)       # Time
229    Q = num.zeros((N, d), num.float)  # Values
230
231    for i, line in enumerate(lines):
232        fields = line.split(',')
233        if not time_as_seconds:
234            realtime = calendar.timegm(time.strptime(fields[0], time_format))
235        else:
236            realtime = float(fields[0])
237        T[i] = realtime - starttime
238
239        for j, value in enumerate(fields[1].split()):
240            Q[i, j] = float(value)
241
242    msg = 'File %s must list time as a monotonuosly ' % filename
243    msg += 'increasing sequence'
244    assert num.alltrue(T[1:] - T[:-1] > 0), msg
245
246    #Create NetCDF file
247    from Scientific.IO.NetCDF import NetCDFFile
248
249    fid = NetCDFFile(filename + '.tms', netcdf_mode_w)
250
251    fid.institution = 'Geoscience Australia'
252    fid.description = 'Time series'
253
254    #Reference point
255    #Start time in seconds since the epoch (midnight 1/1/1970)
256    #FIXME: Use Georef
257    fid.starttime = starttime
258
259    # dimension definitions
260    #fid.createDimension('number_of_volumes', self.number_of_volumes)
261    #fid.createDimension('number_of_vertices', 3)
262
263    fid.createDimension('number_of_timesteps', len(T))
264
265    fid.createVariable('time', netcdf_float, ('number_of_timesteps',))
266
267    fid.variables['time'][:] = T
268
269    for i in range(Q.shape[1]):
270        try:
271            name = quantity_names[i]
272        except:
273            name = 'Attribute%d' % i
274
275        fid.createVariable(name, netcdf_float, ('number_of_timesteps',))
276        fid.variables[name][:] = Q[:,i]
277
278    fid.close()
279   
280   
281
282##
283# @brief
284# @param filename
285# @param verbose
286def tsh2sww(filename, verbose=False):
287    """
288    to check if a tsh/msh file 'looks' good.
289    """
290
291    if verbose == True: log.critical('Creating domain from %s' % filename)
292
293    domain = pmesh_to_domain_instance(filename, Domain)
294
295    if verbose == True: log.critical("Number of triangles = %s" % len(domain))
296
297    domain.smooth = True
298    domain.format = 'sww'   #Native netcdf visualisation format
299    file_path, filename = path.split(filename)
300    filename, ext = path.splitext(filename)
301    domain.set_name(filename)
302    domain.reduction = mean
303
304    if verbose == True: log.critical("file_path = %s" % file_path)
305
306    if file_path == "":
307        file_path = "."
308    domain.set_datadir(file_path)
309
310    if verbose == True:
311        log.critical("Output written to %s%s%s.%s"
312                     % (domain.get_datadir(), sep, domain.get_name(),
313                        domain.format))
314
315    sww = SWW_file(domain)
316    sww.store_connectivity()
317    sww.store_timestep()
318
319
320##
321# @brief Convert URS file to SWW file.
322# @param basename_in Stem of the input filename.
323# @param basename_out Stem of the output filename.
324# @param verbose True if this function is to be verbose.
325# @param remove_nc_files
326# @param minlat Sets extent of area to be used.  If not supplied, full extent.
327# @param maxlat Sets extent of area to be used.  If not supplied, full extent.
328# @param minlon Sets extent of area to be used.  If not supplied, full extent.
329# @param maxlon Sets extent of area to be used.  If not supplied, full extent.
330# @param mint
331# @param maxt
332# @param mean_stage
333# @param origin A 3-tuple with geo referenced UTM coordinates
334# @param zscale
335# @param fail_on_NaN
336# @param NaN_filler
337# @param elevation
338# @note Also convert latitude and longitude to UTM. All coordinates are
339#       assumed to be given in the GDA94 datum.
340def urs2sww(basename_in='o', basename_out=None, verbose=False,
341            remove_nc_files=True,
342            minlat=None, maxlat=None,
343            minlon=None, maxlon=None,
344            mint=None, maxt=None,
345            mean_stage=0,
346            origin=None,
347            zscale=1,
348            fail_on_NaN=True,
349            NaN_filler=0):
350    """Convert a URS file to an SWW file.
351    Convert URS C binary format for wave propagation to
352    sww format native to abstract_2d_finite_volumes.
353
354    Specify only basename_in and read files of the form
355    basefilename-z-mux2, basefilename-e-mux2 and
356    basefilename-n-mux2 containing relative height,
357    x-velocity and y-velocity, respectively.
358
359    Also convert latitude and longitude to UTM. All coordinates are
360    assumed to be given in the GDA94 datum. The latitude and longitude
361    information is for  a grid.
362
363    min's and max's: If omitted - full extend is used.
364    To include a value min may equal it, while max must exceed it.
365    Lat and lon are assumed to be in decimal degrees.
366    NOTE: minlon is the most east boundary.
367
368    origin is a 3-tuple with geo referenced
369    UTM coordinates (zone, easting, northing)
370    It will be the origin of the sww file. This shouldn't be used,
371    since all of anuga should be able to handle an arbitary origin.
372
373    URS C binary format has data orgainised as TIME, LONGITUDE, LATITUDE
374    which means that latitude is the fastest
375    varying dimension (row major order, so to speak)
376
377    In URS C binary the latitudes and longitudes are in assending order.
378    """
379
380    if basename_out == None:
381        basename_out = basename_in
382
383    files_out = urs2nc(basename_in, basename_out)
384
385    ferret2sww(basename_out,
386               minlat=minlat,
387               maxlat=maxlat,
388               minlon=minlon,
389               maxlon=maxlon,
390               mint=mint,
391               maxt=maxt,
392               mean_stage=mean_stage,
393               origin=origin,
394               zscale=zscale,
395               fail_on_NaN=fail_on_NaN,
396               NaN_filler=NaN_filler,
397               inverted_bathymetry=True,
398               verbose=verbose)
399   
400    if remove_nc_files:
401        for file_out in files_out:
402            os.remove(file_out)
403
404
405##
406# @brief Convert 3 URS files back to 4 NC files.
407# @param basename_in Stem of the input filenames.
408# @param basename_outStem of the output filenames.
409# @note The name of the urs file names must be:
410#          [basename_in]-z-mux
411#          [basename_in]-e-mux
412#          [basename_in]-n-mux
413def urs2nc(basename_in='o', basename_out='urs'):
414    """Convert the 3 urs files to 4 nc files.
415
416    The name of the urs file names must be;
417    [basename_in]-z-mux
418    [basename_in]-e-mux
419    [basename_in]-n-mux
420    """
421
422    files_in = [basename_in + WAVEHEIGHT_MUX_LABEL,
423                basename_in + EAST_VELOCITY_LABEL,
424                basename_in + NORTH_VELOCITY_LABEL]
425    files_out = [basename_out + '_ha.nc',
426                 basename_out + '_ua.nc',
427                 basename_out + '_va.nc']
428    quantities = ['HA', 'UA', 'VA']
429
430    #if os.access(files_in[0]+'.mux', os.F_OK) == 0 :
431    for i, file_name in enumerate(files_in):
432        if os.access(file_name, os.F_OK) == 0:
433            if os.access(file_name + '.mux', os.F_OK) == 0 :
434                msg = 'File %s does not exist or is not accessible' % file_name
435                raise IOError, msg
436            else:
437               files_in[i] += '.mux'
438               log.critical("file_name %s" % file_name)
439
440    hashed_elevation = None
441    for file_in, file_out, quantity in map(None, files_in,
442                                           files_out,
443                                           quantities):
444        lonlatdep, lon, lat, depth = _binary_c2nc(file_in,
445                                                  file_out,
446                                                  quantity)
447        if hashed_elevation == None:
448            elevation_file = basename_out + '_e.nc'
449            write_elevation_nc(elevation_file,
450                               lon,
451                               lat,
452                               depth)
453            hashed_elevation = myhash(lonlatdep)
454        else:
455            msg = "The elevation information in the mux files is inconsistent"
456            assert hashed_elevation == myhash(lonlatdep), msg
457
458    files_out.append(elevation_file)
459
460    return files_out
461
462
463##
464# @brief Convert a quantity URS file to a NetCDF file.
465# @param file_in Path to input URS file.
466# @param file_out Path to the output file.
467# @param quantity Name of the quantity to be written to the output file.
468# @return A tuple (lonlatdep, lon, lat, depth).
469def _binary_c2nc(file_in, file_out, quantity):
470    """Reads in a quantity urs file and writes a quantity nc file.
471    Additionally, returns the depth and lat, long info,
472    so it can be written to a file.
473    """
474
475    columns = 3                           # long, lat , depth
476    mux_file = open(file_in, 'rb')
477
478    # Number of points/stations
479    (points_num,) = unpack('i', mux_file.read(4))
480
481    # nt, int - Number of time steps
482    (time_step_count,) = unpack('i', mux_file.read(4))
483
484    #dt, float - time step, seconds
485    (time_step,) = unpack('f', mux_file.read(4))
486
487    msg = "Bad data in the mux file."
488    if points_num < 0:
489        mux_file.close()
490        raise ANUGAError, msg
491    if time_step_count < 0:
492        mux_file.close()
493        raise ANUGAError, msg
494    if time_step < 0:
495        mux_file.close()
496        raise ANUGAError, msg
497
498    lonlatdep = p_array.array('f')
499    lonlatdep.read(mux_file, columns * points_num)
500    lonlatdep = num.array(lonlatdep, dtype=num.float)
501    lonlatdep = num.reshape(lonlatdep, (points_num, columns))
502
503    lon, lat, depth = lon_lat2grid(lonlatdep)
504    lon_sorted = list(lon)
505    lon_sorted.sort()
506
507    if not num.alltrue(lon == lon_sorted):
508        msg = "Longitudes in mux file are not in ascending order"
509        raise IOError, msg
510
511    lat_sorted = list(lat)
512    lat_sorted.sort()
513
514    nc_file = Write_nc(quantity,
515                       file_out,
516                       time_step_count,
517                       time_step,
518                       lon,
519                       lat)
520
521    for i in range(time_step_count):
522        #Read in a time slice from mux file
523        hz_p_array = p_array.array('f')
524        hz_p_array.read(mux_file, points_num)
525        hz_p = num.array(hz_p_array, dtype=num.float)
526        hz_p = num.reshape(hz_p, (len(lon), len(lat)))
527        hz_p = num.transpose(hz_p)  # mux has lat varying fastest, nc has long v.f.
528
529        #write time slice to nc file
530        nc_file.store_timestep(hz_p)
531
532    mux_file.close()
533    nc_file.close()
534
535    return lonlatdep, lon, lat, depth
536
537
538
539##
540# @brief Return max&min indexes (for slicing) of area specified.
541# @param latitudes_ref ??
542# @param longitudes_ref ??
543# @param minlat Minimum latitude of specified area.
544# @param maxlat Maximum latitude of specified area.
545# @param minlon Minimum longitude of specified area.
546# @param maxlon Maximum longitude of specified area.
547# @return Tuple (lat_min_index, lat_max_index, lon_min_index, lon_max_index)
548def get_min_max_indices(latitudes_ref, longitudes_ref,
549                         minlat=None, maxlat=None,
550                         minlon=None, maxlon=None):
551    """
552    Return max, min indexes (for slicing) of the lat and long arrays to cover
553    the area specified with min/max lat/long.
554
555    Think of the latitudes and longitudes describing a 2d surface.
556    The area returned is, if possible, just big enough to cover the
557    inputed max/min area. (This will not be possible if the max/min area
558    has a section outside of the latitudes/longitudes area.)
559
560    asset  longitudes are sorted,
561    long - from low to high (west to east, eg 148 - 151)
562    assert latitudes are sorted, ascending or decending
563    """
564
565    latitudes = latitudes_ref[:]
566    longitudes = longitudes_ref[:]
567
568    latitudes = ensure_numeric(latitudes)
569    longitudes = ensure_numeric(longitudes)
570
571    assert num.allclose(num.sort(longitudes), longitudes)
572
573    #print latitudes[0],longitudes[0]
574    #print len(latitudes),len(longitudes)
575    #print latitudes[len(latitudes)-1],longitudes[len(longitudes)-1]
576
577    lat_ascending = True
578    if not num.allclose(num.sort(latitudes), latitudes):
579        lat_ascending = False
580        # reverse order of lat, so it's in ascending order
581        latitudes = latitudes[::-1]
582        assert num.allclose(num.sort(latitudes), latitudes)
583
584    largest_lat_index = len(latitudes)-1
585
586    #Cut out a smaller extent.
587    if minlat == None:
588        lat_min_index = 0
589    else:
590        lat_min_index = num.searchsorted(latitudes, minlat)-1
591        if lat_min_index < 0:
592            lat_min_index = 0
593
594    if maxlat == None:
595        lat_max_index = largest_lat_index #len(latitudes)
596    else:
597        lat_max_index = num.searchsorted(latitudes, maxlat)
598        if lat_max_index > largest_lat_index:
599            lat_max_index = largest_lat_index
600
601    if minlon == None:
602        lon_min_index = 0
603    else:
604        lon_min_index = num.searchsorted(longitudes, minlon)-1
605        if lon_min_index < 0:
606            lon_min_index = 0
607
608    if maxlon == None:
609        lon_max_index = len(longitudes)
610    else:
611        lon_max_index = num.searchsorted(longitudes, maxlon)
612
613    # Reversing the indexes, if the lat array is decending
614    if lat_ascending is False:
615        lat_min_index, lat_max_index = largest_lat_index - lat_max_index, \
616                                       largest_lat_index - lat_min_index
617    lat_max_index = lat_max_index + 1 # taking into account how slicing works
618    lon_max_index = lon_max_index + 1 # taking into account how slicing works
619
620    return lat_min_index, lat_max_index, lon_min_index, lon_max_index
621
622
Note: See TracBrowser for help on using the repository browser.