Context Navigation

source: trunk/anuga_core/source/anuga/file_conversion/file_conversion.py @ 7761

Last change on this file since 7761 was 7758, checked in by hudson, 14 years ago
Added extra file conversion classes to file_conversion.
File size: 19.3 KB

Line
1	""" Conversion routines.
2	ANUGA needs to deal with many different file formats, and this
3	module provides routines for easily converting between them.
4
5	These routines are necessarily high level, sitting above the various
6	ANUGA modules. They take a file as input, and output a file.
7	"""
8
9	#non ANUGA imports
10	from Scientific.IO.NetCDF import NetCDFFile
11	import numpy as num
12	import os.path
13	import os
14
15	#ANUGA imports
16	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
17	write_NetCDF_georeference, ensure_geo_reference
18	from anuga.abstract_2d_finite_volumes.pmesh2domain import \
19	pmesh_to_domain_instance
20	from anuga.utilities.numerical_tools import ensure_numeric
21	from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a, \
22	netcdf_float
23
24
25	#shallow water imports
26	from anuga.shallow_water.sww_file import Read_sww, Write_sww
27	from anuga.shallow_water.shallow_water_domain import Domain
28	from anuga.shallow_water.shallow_water_domain import Domain
29
30
31	def sww2obj(basefilename, size):
32	""" Convert netcdf based data output to obj
33
34	Convert SWW data to OBJ data.
35	basefilename Stem of filename, needs size and extension added.
36	size The number of lines to write.
37	"""
38
39	from Scientific.IO.NetCDF import NetCDFFile
40
41	# Get NetCDF
42	FN = create_filename('.', basefilename, 'sww', size)
43	log.critical('Reading from %s' % FN)
44	fid = NetCDFFile(FN, netcdf_mode_r) #Open existing file for read
45
46	# Get the variables
47	x = fid.variables['x']
48	y = fid.variables['y']
49	z = fid.variables['elevation']
50	time = fid.variables['time']
51	stage = fid.variables['stage']
52
53	M = size #Number of lines
54	xx = num.zeros((M,3), num.float)
55	yy = num.zeros((M,3), num.float)
56	zz = num.zeros((M,3), num.float)
57
58	for i in range(M):
59	for j in range(3):
60	xx[i,j] = x[i+j*M]
61	yy[i,j] = y[i+j*M]
62	zz[i,j] = z[i+j*M]
63
64	# Write obj for bathymetry
65	FN = create_filename('.', basefilename, 'obj', size)
66	write_obj(FN,xx,yy,zz)
67
68	# Now read all the data with variable information, combine with
69	# x,y info and store as obj
70	for k in range(len(time)):
71	t = time[k]
72	log.critical('Processing timestep %f' % t)
73
74	for i in range(M):
75	for j in range(3):
76	zz[i,j] = stage[k,i+j*M]
77
78	#Write obj for variable data
79	#FN = create_filename(basefilename, 'obj', size, time=t)
80	FN = create_filename('.', basefilename[:5], 'obj', size, time=t)
81	write_obj(FN, xx, yy, zz)
82
83
84	##
85	# @brief
86	# @param basefilename Stem of filename, needs size and extension added.
87	def dat2obj(basefilename):
88	"""Convert line based data output to obj
89	FIXME: Obsolete?
90	"""
91
92	import glob, os
93	from anuga.config import data_dir
94
95	# Get bathymetry and x,y's
96	lines = open(data_dir+os.sep+basefilename+'_geometry.dat', 'r').readlines()
97
98	M = len(lines) #Number of lines
99	x = num.zeros((M,3), num.float)
100	y = num.zeros((M,3), num.float)
101	z = num.zeros((M,3), num.float)
102
103	for i, line in enumerate(lines):
104	tokens = line.split()
105	values = map(float, tokens)
106
107	for j in range(3):
108	x[i,j] = values[j*3]
109	y[i,j] = values[j*3+1]
110	z[i,j] = values[j*3+2]
111
112	# Write obj for bathymetry
113	write_obj(data_dir + os.sep + basefilename + '_geometry', x, y, z)
114
115	# Now read all the data files with variable information, combine with
116	# x,y info and store as obj.
117
118	files = glob.glob(data_dir + os.sep + basefilename + '*.dat')
119	for filename in files:
120	log.critical('Processing %s' % filename)
121
122	lines = open(data_dir + os.sep + filename, 'r').readlines()
123	assert len(lines) == M
124	root, ext = os.path.splitext(filename)
125
126	# Get time from filename
127	i0 = filename.find('_time=')
128	if i0 == -1:
129	#Skip bathymetry file
130	continue
131
132	i0 += 6 #Position where time starts
133	i1 = filename.find('.dat')
134
135	if i1 > i0:
136	t = float(filename[i0:i1])
137	else:
138	raise DataTimeError, 'Hmmmm'
139
140	for i, line in enumerate(lines):
141	tokens = line.split()
142	values = map(float,tokens)
143
144	for j in range(3):
145	z[i,j] = values[j]
146
147	# Write obj for variable data
148	write_obj(data_dir + os.sep + basefilename + '_time=%.4f' % t, x, y, z)
149
150	##
151	# @brief Convert time-series text file to TMS file.
152	# @param filename
153	# @param quantity_names
154	# @param time_as_seconds
155	def timefile2netcdf(filename, quantity_names=None, time_as_seconds=False):
156	"""Template for converting typical text files with time series to
157	NetCDF tms file.
158
159	The file format is assumed to be either two fields separated by a comma:
160
161	time [DD/MM/YY hh:mm:ss], value0 value1 value2 ...
162
163	E.g
164
165	31/08/04 00:00:00, 1.328223 0 0
166	31/08/04 00:15:00, 1.292912 0 0
167
168	or time (seconds), value0 value1 value2 ...
169
170	0.0, 1.328223 0 0
171	0.1, 1.292912 0 0
172
173	will provide a time dependent function f(t) with three attributes
174
175	filename is assumed to be the rootname with extenisons .txt and .sww
176	"""
177
178	import time, calendar
179	from anuga.config import time_format
180	from anuga.utilities.numerical_tools import ensure_numeric
181
182	file_text = filename + '.txt'
183	fid = open(file_text)
184	line = fid.readline()
185	fid.close()
186
187	fields = line.split(',')
188	msg = "File %s must have the format 'datetime, value0 value1 value2 ...'" \
189	% file_text
190	assert len(fields) == 2, msg
191
192	if not time_as_seconds:
193	try:
194	starttime = calendar.timegm(time.strptime(fields[0], time_format))
195	except ValueError:
196	msg = 'First field in file %s must be' % file_text
197	msg += ' date-time with format %s.\n' % time_format
198	msg += 'I got %s instead.' % fields[0]
199	raise DataTimeError, msg
200	else:
201	try:
202	starttime = float(fields[0])
203	except Error:
204	msg = "Bad time format"
205	raise DataTimeError, msg
206
207	# Split values
208	values = []
209	for value in fields[1].split():
210	values.append(float(value))
211
212	q = ensure_numeric(values)
213
214	msg = 'ERROR: File must contain at least one independent value'
215	assert len(q.shape) == 1, msg
216
217	# Read times proper
218	from anuga.config import time_format
219	import time, calendar
220
221	fid = open(file_text)
222	lines = fid.readlines()
223	fid.close()
224
225	N = len(lines)
226	d = len(q)
227
228	T = num.zeros(N, num.float) # Time
229	Q = num.zeros((N, d), num.float) # Values
230
231	for i, line in enumerate(lines):
232	fields = line.split(',')
233	if not time_as_seconds:
234	realtime = calendar.timegm(time.strptime(fields[0], time_format))
235	else:
236	realtime = float(fields[0])
237	T[i] = realtime - starttime
238
239	for j, value in enumerate(fields[1].split()):
240	Q[i, j] = float(value)
241
242	msg = 'File %s must list time as a monotonuosly ' % filename
243	msg += 'increasing sequence'
244	assert num.alltrue(T[1:] - T[:-1] > 0), msg
245
246	#Create NetCDF file
247	from Scientific.IO.NetCDF import NetCDFFile
248
249	fid = NetCDFFile(filename + '.tms', netcdf_mode_w)
250
251	fid.institution = 'Geoscience Australia'
252	fid.description = 'Time series'
253
254	#Reference point
255	#Start time in seconds since the epoch (midnight 1/1/1970)
256	#FIXME: Use Georef
257	fid.starttime = starttime
258
259	# dimension definitions
260	#fid.createDimension('number_of_volumes', self.number_of_volumes)
261	#fid.createDimension('number_of_vertices', 3)
262
263	fid.createDimension('number_of_timesteps', len(T))
264
265	fid.createVariable('time', netcdf_float, ('number_of_timesteps',))
266
267	fid.variables['time'][:] = T
268
269	for i in range(Q.shape[1]):
270	try:
271	name = quantity_names[i]
272	except:
273	name = 'Attribute%d' % i
274
275	fid.createVariable(name, netcdf_float, ('number_of_timesteps',))
276	fid.variables[name][:] = Q[:,i]
277
278	fid.close()
279
280
281
282	##
283	# @brief
284	# @param filename
285	# @param verbose
286	def tsh2sww(filename, verbose=False):
287	"""
288	to check if a tsh/msh file 'looks' good.
289	"""
290
291	if verbose == True: log.critical('Creating domain from %s' % filename)
292
293	domain = pmesh_to_domain_instance(filename, Domain)
294
295	if verbose == True: log.critical("Number of triangles = %s" % len(domain))
296
297	domain.smooth = True
298	domain.format = 'sww' #Native netcdf visualisation format
299	file_path, filename = path.split(filename)
300	filename, ext = path.splitext(filename)
301	domain.set_name(filename)
302	domain.reduction = mean
303
304	if verbose == True: log.critical("file_path = %s" % file_path)
305
306	if file_path == "":
307	file_path = "."
308	domain.set_datadir(file_path)
309
310	if verbose == True:
311	log.critical("Output written to %s%s%s.%s"
312	% (domain.get_datadir(), sep, domain.get_name(),
313	domain.format))
314
315	sww = SWW_file(domain)
316	sww.store_connectivity()
317	sww.store_timestep()
318
319
320	##
321	# @brief Convert URS file to SWW file.
322	# @param basename_in Stem of the input filename.
323	# @param basename_out Stem of the output filename.
324	# @param verbose True if this function is to be verbose.
325	# @param remove_nc_files
326	# @param minlat Sets extent of area to be used. If not supplied, full extent.
327	# @param maxlat Sets extent of area to be used. If not supplied, full extent.
328	# @param minlon Sets extent of area to be used. If not supplied, full extent.
329	# @param maxlon Sets extent of area to be used. If not supplied, full extent.
330	# @param mint
331	# @param maxt
332	# @param mean_stage
333	# @param origin A 3-tuple with geo referenced UTM coordinates
334	# @param zscale
335	# @param fail_on_NaN
336	# @param NaN_filler
337	# @param elevation
338	# @note Also convert latitude and longitude to UTM. All coordinates are
339	# assumed to be given in the GDA94 datum.
340	def urs2sww(basename_in='o', basename_out=None, verbose=False,
341	remove_nc_files=True,
342	minlat=None, maxlat=None,
343	minlon=None, maxlon=None,
344	mint=None, maxt=None,
345	mean_stage=0,
346	origin=None,
347	zscale=1,
348	fail_on_NaN=True,
349	NaN_filler=0):
350	"""Convert a URS file to an SWW file.
351	Convert URS C binary format for wave propagation to
352	sww format native to abstract_2d_finite_volumes.
353
354	Specify only basename_in and read files of the form
355	basefilename-z-mux2, basefilename-e-mux2 and
356	basefilename-n-mux2 containing relative height,
357	x-velocity and y-velocity, respectively.
358
359	Also convert latitude and longitude to UTM. All coordinates are
360	assumed to be given in the GDA94 datum. The latitude and longitude
361	information is for a grid.
362
363	min's and max's: If omitted - full extend is used.
364	To include a value min may equal it, while max must exceed it.
365	Lat and lon are assumed to be in decimal degrees.
366	NOTE: minlon is the most east boundary.
367
368	origin is a 3-tuple with geo referenced
369	UTM coordinates (zone, easting, northing)
370	It will be the origin of the sww file. This shouldn't be used,
371	since all of anuga should be able to handle an arbitary origin.
372
373	URS C binary format has data orgainised as TIME, LONGITUDE, LATITUDE
374	which means that latitude is the fastest
375	varying dimension (row major order, so to speak)
376
377	In URS C binary the latitudes and longitudes are in assending order.
378	"""
379
380	if basename_out == None:
381	basename_out = basename_in
382
383	files_out = urs2nc(basename_in, basename_out)
384
385	ferret2sww(basename_out,
386	minlat=minlat,
387	maxlat=maxlat,
388	minlon=minlon,
389	maxlon=maxlon,
390	mint=mint,
391	maxt=maxt,
392	mean_stage=mean_stage,
393	origin=origin,
394	zscale=zscale,
395	fail_on_NaN=fail_on_NaN,
396	NaN_filler=NaN_filler,
397	inverted_bathymetry=True,
398	verbose=verbose)
399
400	if remove_nc_files:
401	for file_out in files_out:
402	os.remove(file_out)
403
404
405	##
406	# @brief Convert 3 URS files back to 4 NC files.
407	# @param basename_in Stem of the input filenames.
408	# @param basename_outStem of the output filenames.
409	# @note The name of the urs file names must be:
410	# [basename_in]-z-mux
411	# [basename_in]-e-mux
412	# [basename_in]-n-mux
413	def urs2nc(basename_in='o', basename_out='urs'):
414	"""Convert the 3 urs files to 4 nc files.
415
416	The name of the urs file names must be;
417	[basename_in]-z-mux
418	[basename_in]-e-mux
419	[basename_in]-n-mux
420	"""
421
422	files_in = [basename_in + WAVEHEIGHT_MUX_LABEL,
423	basename_in + EAST_VELOCITY_LABEL,
424	basename_in + NORTH_VELOCITY_LABEL]
425	files_out = [basename_out + '_ha.nc',
426	basename_out + '_ua.nc',
427	basename_out + '_va.nc']
428	quantities = ['HA', 'UA', 'VA']
429
430	#if os.access(files_in[0]+'.mux', os.F_OK) == 0 :
431	for i, file_name in enumerate(files_in):
432	if os.access(file_name, os.F_OK) == 0:
433	if os.access(file_name + '.mux', os.F_OK) == 0 :
434	msg = 'File %s does not exist or is not accessible' % file_name
435	raise IOError, msg
436	else:
437	files_in[i] += '.mux'
438	log.critical("file_name %s" % file_name)
439
440	hashed_elevation = None
441	for file_in, file_out, quantity in map(None, files_in,
442	files_out,
443	quantities):
444	lonlatdep, lon, lat, depth = _binary_c2nc(file_in,
445	file_out,
446	quantity)
447	if hashed_elevation == None:
448	elevation_file = basename_out + '_e.nc'
449	write_elevation_nc(elevation_file,
450	lon,
451	lat,
452	depth)
453	hashed_elevation = myhash(lonlatdep)
454	else:
455	msg = "The elevation information in the mux files is inconsistent"
456	assert hashed_elevation == myhash(lonlatdep), msg
457
458	files_out.append(elevation_file)
459
460	return files_out
461
462
463	##
464	# @brief Convert a quantity URS file to a NetCDF file.
465	# @param file_in Path to input URS file.
466	# @param file_out Path to the output file.
467	# @param quantity Name of the quantity to be written to the output file.
468	# @return A tuple (lonlatdep, lon, lat, depth).
469	def _binary_c2nc(file_in, file_out, quantity):
470	"""Reads in a quantity urs file and writes a quantity nc file.
471	Additionally, returns the depth and lat, long info,
472	so it can be written to a file.
473	"""
474
475	columns = 3 # long, lat , depth
476	mux_file = open(file_in, 'rb')
477
478	# Number of points/stations
479	(points_num,) = unpack('i', mux_file.read(4))
480
481	# nt, int - Number of time steps
482	(time_step_count,) = unpack('i', mux_file.read(4))
483
484	#dt, float - time step, seconds
485	(time_step,) = unpack('f', mux_file.read(4))
486
487	msg = "Bad data in the mux file."
488	if points_num < 0:
489	mux_file.close()
490	raise ANUGAError, msg
491	if time_step_count < 0:
492	mux_file.close()
493	raise ANUGAError, msg
494	if time_step < 0:
495	mux_file.close()
496	raise ANUGAError, msg
497
498	lonlatdep = p_array.array('f')
499	lonlatdep.read(mux_file, columns * points_num)
500	lonlatdep = num.array(lonlatdep, dtype=num.float)
501	lonlatdep = num.reshape(lonlatdep, (points_num, columns))
502
503	lon, lat, depth = lon_lat2grid(lonlatdep)
504	lon_sorted = list(lon)
505	lon_sorted.sort()
506
507	if not num.alltrue(lon == lon_sorted):
508	msg = "Longitudes in mux file are not in ascending order"
509	raise IOError, msg
510
511	lat_sorted = list(lat)
512	lat_sorted.sort()
513
514	nc_file = Write_nc(quantity,
515	file_out,
516	time_step_count,
517	time_step,
518	lon,
519	lat)
520
521	for i in range(time_step_count):
522	#Read in a time slice from mux file
523	hz_p_array = p_array.array('f')
524	hz_p_array.read(mux_file, points_num)
525	hz_p = num.array(hz_p_array, dtype=num.float)
526	hz_p = num.reshape(hz_p, (len(lon), len(lat)))
527	hz_p = num.transpose(hz_p) # mux has lat varying fastest, nc has long v.f.
528
529	#write time slice to nc file
530	nc_file.store_timestep(hz_p)
531
532	mux_file.close()
533	nc_file.close()
534
535	return lonlatdep, lon, lat, depth
536
537
538
539	##
540	# @brief Return max&min indexes (for slicing) of area specified.
541	# @param latitudes_ref ??
542	# @param longitudes_ref ??
543	# @param minlat Minimum latitude of specified area.
544	# @param maxlat Maximum latitude of specified area.
545	# @param minlon Minimum longitude of specified area.
546	# @param maxlon Maximum longitude of specified area.
547	# @return Tuple (lat_min_index, lat_max_index, lon_min_index, lon_max_index)
548	def get_min_max_indices(latitudes_ref, longitudes_ref,
549	minlat=None, maxlat=None,
550	minlon=None, maxlon=None):
551	"""
552	Return max, min indexes (for slicing) of the lat and long arrays to cover
553	the area specified with min/max lat/long.
554
555	Think of the latitudes and longitudes describing a 2d surface.
556	The area returned is, if possible, just big enough to cover the
557	inputed max/min area. (This will not be possible if the max/min area
558	has a section outside of the latitudes/longitudes area.)
559
560	asset longitudes are sorted,
561	long - from low to high (west to east, eg 148 - 151)
562	assert latitudes are sorted, ascending or decending
563	"""
564
565	latitudes = latitudes_ref[:]
566	longitudes = longitudes_ref[:]
567
568	latitudes = ensure_numeric(latitudes)
569	longitudes = ensure_numeric(longitudes)
570
571	assert num.allclose(num.sort(longitudes), longitudes)
572
573	#print latitudes[0],longitudes[0]
574	#print len(latitudes),len(longitudes)
575	#print latitudes[len(latitudes)-1],longitudes[len(longitudes)-1]
576
577	lat_ascending = True
578	if not num.allclose(num.sort(latitudes), latitudes):
579	lat_ascending = False
580	# reverse order of lat, so it's in ascending order
581	latitudes = latitudes[::-1]
582	assert num.allclose(num.sort(latitudes), latitudes)
583
584	largest_lat_index = len(latitudes)-1
585
586	#Cut out a smaller extent.
587	if minlat == None:
588	lat_min_index = 0
589	else:
590	lat_min_index = num.searchsorted(latitudes, minlat)-1
591	if lat_min_index < 0:
592	lat_min_index = 0
593
594	if maxlat == None:
595	lat_max_index = largest_lat_index #len(latitudes)
596	else:
597	lat_max_index = num.searchsorted(latitudes, maxlat)
598	if lat_max_index > largest_lat_index:
599	lat_max_index = largest_lat_index
600
601	if minlon == None:
602	lon_min_index = 0
603	else:
604	lon_min_index = num.searchsorted(longitudes, minlon)-1
605	if lon_min_index < 0:
606	lon_min_index = 0
607
608	if maxlon == None:
609	lon_max_index = len(longitudes)
610	else:
611	lon_max_index = num.searchsorted(longitudes, maxlon)
612
613	# Reversing the indexes, if the lat array is decending
614	if lat_ascending is False:
615	lat_min_index, lat_max_index = largest_lat_index - lat_max_index, \
616	largest_lat_index - lat_min_index
617	lat_max_index = lat_max_index + 1 # taking into account how slicing works
618	lon_max_index = lon_max_index + 1 # taking into account how slicing works
619
620	return lat_min_index, lat_max_index, lon_min_index, lon_max_index
621
622

Note: See TracBrowser for help on using the repository browser.

Download in other formats: