Context Navigation

source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4289

Last change on this file since 4289 was 4289, checked in by nick, 18 years ago
added some print statements if verbose
File size: 49.3 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5	from sys import maxint
6	from os import access, F_OK, R_OK
7	from types import DictType
8	from warnings import warn
9	from string import lower
10	from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11	size, shape
12	from random import randint
13	#from MA import tolist
14
15	from Scientific.IO.NetCDF import NetCDFFile
16	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
17	from anuga.utilities.numerical_tools import ensure_numeric
18	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
19	TitleError, DEFAULT_ZONE
20	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
21	from anuga.utilities.anuga_exceptions import ANUGAError
22	from anuga.config import points_file_block_line_size as MAX_READ_LINES
23
24	class Geospatial_data:
25
26	def __init__(self,
27	data_points=None, # this can also be a points file name
28	attributes=None,
29	geo_reference=None,
30	default_attribute_name=None,
31	file_name=None,
32	delimiter=None,
33	latitudes=None,
34	longitudes=None,
35	points_are_lats_longs=False,
36	max_read_lines=None,
37	load_file_now=True,
38	verbose=False):
39
40
41	"""
42	Create instance from data points and associated attributes
43
44	data_points: x,y coordinates in meters. Type must be either a
45	sequence of 2-tuples or an Mx2 Numeric array of floats. A file name
46	can also be passed in here.
47
48	attributes: Associated values for each data point. The type
49	must be either a list or an array of length M or a dictionary
50	of lists (or arrays) of length M. In the latter case the keys
51	in the dictionary represent the attribute names, in the former
52	the attribute will get the default name "attribute".
53
54	geo_reference: Object representing the origin of the data
55	points. It contains UTM zone, easting and northing and data
56	points are assumed to be relative to this origin.
57	If geo_reference is None, the default geo ref object is used.
58
59	default_attribute_name: Name of default attribute to be used with
60	get_attribute_values. The idea is that the dataset can be
61	equipped with information about which attribute to return.
62	If None, the default is the "first"
63
64	latitudes, longitudes: Vectors of latitudes and longitudes,
65	used to specify location instead of points.
66
67	points_are_lats_longs: Set this as true if the points are actually
68	lats and longs, not UTM
69
70	max_read_lines: The number of rows read into memory when using
71	blocking to read a file.
72
73	load_file_now: If true the file is automatically loaded
74	into the geospatial instance. Used when blocking.
75
76	file_name: Name of input netCDF file or .txt file. netCDF file must
77	have dimensions "points" etc.
78	.txt file is a comma seperated file with x, y and attribute
79	data.
80
81	The first line has the titles of the columns. The first two
82	column titles are checked to see if they start with lat or
83	long (not case sensitive). If so the data is assumed to be
84	latitude and longitude, in decimal format and converted to
85	UTM. Otherwise the first two columns are assumed to be the x
86	and y, and the title names acually used are ignored.
87
88
89	The format for a .txt file is:
90	1st line: [column names]
91	other lines: x y [attributes]
92
93	for example:
94	x, y, elevation, friction
95	0.6, 0.7, 4.9, 0.3
96	1.9, 2.8, 5, 0.3
97	2.7, 2.4, 5.2, 0.3
98
99	The first two columns are always assumed to be x, y
100	coordinates.
101
102	An issue with the xya format is that the attribute column order
103	is not be controlled. The info is stored in a dictionary and it's
104	written in an order dependent on the hash order
105
106	The format for a Points dictionary is:
107
108	['pointlist'] a 2 column array describing points. 1st column x,
109	2nd column y.
110	['attributelist'], a dictionary of 1D arrays, representing
111	attribute values at the point. The dictionary key is the attribute
112	header.
113	['geo_reference'] a Geo_refernece object. Use if the point
114	information is relative. This is optional.
115	eg
116	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
117	dic['attributelist']['elevation'] = [[7.0,5.0]
118
119	delimiter: is the file delimiter that will be used when
120	importing a .xya file, which is being phased out.
121
122	verbose:
123
124
125	"""
126
127	if isinstance(data_points, basestring):
128	# assume data point is really a file name
129	file_name = data_points
130
131	self.set_verbose(verbose)
132	self.geo_reference=None #create the attribute
133	self.file_name = file_name
134	self.max_read_lines = max_read_lines
135
136	if delimiter is not None:
137	msg = 'Specifying delimiters will be removed.'
138	msg = 'Text file format is moving to comma seperated .txt files.'
139	warn(msg, DeprecationWarning)
140	if file_name is None:
141	if delimiter is not None:
142	msg = 'No file specified yet a delimiter is provided!'
143	raise ValueError, msg
144	file_name = None
145	if latitudes is not None or longitudes is not None or \
146	points_are_lats_longs:
147	data_points, geo_reference = \
148	_set_using_lat_long(latitudes=latitudes,
149	longitudes=longitudes,
150	geo_reference=geo_reference,
151	data_points=data_points,
152	points_are_lats_longs=points_are_lats_longs)
153	self.check_data_points(data_points)
154	self.set_attributes(attributes)
155	self.set_geo_reference(geo_reference)
156	self.set_default_attribute_name(default_attribute_name)
157
158	elif load_file_now is True:
159	# watch for case where file name and points,
160	# attributes etc are provided!!
161	# if file name then all provided info will be removed!
162	self.import_points_file(file_name, delimiter, verbose)
163
164	self.check_data_points(self.data_points)
165	self.set_attributes(self.attributes)
166	self.set_geo_reference(self.geo_reference)
167	self.set_default_attribute_name(default_attribute_name)
168
169	#Why?
170	#assert self.attributes is None or isinstance(self.attributes, DictType)
171	#This is a hassle when blocking, so I've removed it.
172
173
174	def __len__(self):
175	return len(self.data_points)
176
177	def __repr__(self):
178	return str(self.get_data_points(absolute=True))
179
180
181	def check_data_points(self, data_points):
182	"""Checks data points
183	"""
184
185	if data_points is None:
186	self.data_points = None
187	msg = 'There is no data or file provided!'
188	raise ValueError, msg
189
190	else:
191	self.data_points = ensure_numeric(data_points)
192	#print "self.data_points.shape",self.data_points.shape
193	if not (0,) == self.data_points.shape:
194	assert len(self.data_points.shape) == 2
195	assert self.data_points.shape[1] == 2
196
197	def set_attributes(self, attributes):
198	"""Check and assign attributes dictionary
199	"""
200
201	if attributes is None:
202	self.attributes = None
203	return
204
205	if not isinstance(attributes, DictType):
206	#Convert single attribute into dictionary
207	attributes = {'attribute': attributes}
208
209	#Check input attributes
210	for key in attributes.keys():
211	try:
212	attributes[key] = ensure_numeric(attributes[key])
213	except:
214	msg = 'Attribute %s could not be converted' %key
215	msg += 'to a numeric vector'
216	raise msg
217
218	self.attributes = attributes
219
220
221	def set_geo_reference(self, geo_reference):
222
223	from anuga.coordinate_transforms.geo_reference import Geo_reference
224
225	if geo_reference is None:
226	geo_reference = Geo_reference() # Use default
227	if not isinstance(geo_reference, Geo_reference):
228	msg = 'Argument geo_reference must be a valid Geo_reference \n'
229	msg += 'object or None.'
230	raise msg
231
232	# if a geo ref already exists, change the point data to
233	# represent the new geo-ref
234	if self.geo_reference is not None:
235	#FIXME: Maybe put out a warning here...
236	self.data_points = self.get_data_points \
237	(geo_reference=geo_reference)
238
239	self.geo_reference = geo_reference
240
241
242	def set_default_attribute_name(self, default_attribute_name):
243	self.default_attribute_name = default_attribute_name
244
245	def set_verbose(self, verbose=False):
246	if verbose in [False, True]:
247	self.verbose = verbose
248	else:
249	msg = 'Illegal value: %s' %str(verbose)
250	raise Exception, msg
251
252	def clip(self, polygon, closed=True):
253	"""Clip geospatial data by a polygon
254
255	Input
256	polygon - Either a list of points, an Nx2 array or
257	a Geospatial data object.
258	closed - (optional) determine whether points on boundary should be
259	regarded as belonging to the polygon (closed = True)
260	or not (closed = False). Default is True.
261
262	Output
263	New geospatial data object representing points inside
264	specified polygon.
265
266	"""
267
268	from anuga.utilities.polygon import inside_polygon
269
270	if isinstance(polygon, Geospatial_data):
271	# Polygon is an object - extract points
272	polygon = polygon.get_data_points()
273
274	points = self.get_data_points()
275	inside_indices = inside_polygon(points, polygon, closed)
276
277	clipped_G = self.get_sample(inside_indices)
278	# clipped_points = take(points, inside_indices)
279
280	# Clip all attributes
281	# attributes = self.get_all_attributes()
282
283	# clipped_attributes = {}
284	# if attributes is not None:
285	# for key, att in attributes.items():
286	# clipped_attributes[key] = take(att, inside_indices)
287
288	# return Geospatial_data(clipped_points, clipped_attributes)
289	return clipped_G
290
291
292	def clip_outside(self, polygon, closed=True):
293	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
294
295	Input
296	polygon - Either a list of points, an Nx2 array or
297	a Geospatial data object.
298	closed - (optional) determine whether points on boundary should be
299	regarded as belonging to the polygon (closed = True)
300	or not (closed = False). Default is True.
301
302	Output
303	Geospatial data object representing point OUTSIDE specified polygon
304
305	"""
306
307	from anuga.utilities.polygon import outside_polygon
308
309	if isinstance(polygon, Geospatial_data):
310	# Polygon is an object - extract points
311	polygon = polygon.get_data_points()
312
313	points = self.get_data_points()
314	outside_indices = outside_polygon(points, polygon, closed)
315
316	clipped_G = self.get_sample(outside_indices)
317
318	# clipped_points = take(points, outside_indices)
319
320	# Clip all attributes
321	# attributes = self.get_all_attributes()
322
323	# clipped_attributes = {}
324	# if attributes is not None:
325	# for key, att in attributes.items():
326	# clipped_attributes[key] = take(att, outside_indices)
327
328	# return Geospatial_data(clipped_points, clipped_attributes)
329	return clipped_G
330
331
332	def get_geo_reference(self):
333	return self.geo_reference
334
335	def get_data_points(self, absolute=True, geo_reference=None,
336	as_lat_long=False):
337	"""Get coordinates for all data points as an Nx2 array
338
339	If absolute is False returned coordinates are relative to the
340	internal georeference's xll and yll corners, otherwise
341	absolute UTM coordinates are returned.
342
343	If a geo_reference is passed the points are returned relative
344	to that geo_reference.
345
346	Default: absolute is True.
347	"""
348	if as_lat_long is True:
349	msg = "Points need a zone to be converted into lats and longs"
350	assert self.geo_reference is not None, msg
351	zone = self.geo_reference.get_zone()
352	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
353	lats_longs = []
354	for point in self.get_data_points(True):
355	### UTMtoLL(northing, easting, zone,
356	lat_calced, long_calced = UTMtoLL(point[1],point[0], zone)
357	lats_longs.append((lat_calced, long_calced)) # to hash
358	return lats_longs
359
360	if absolute is True and geo_reference is None:
361	return self.geo_reference.get_absolute(self.data_points)
362	elif geo_reference is not None:
363	return geo_reference.change_points_geo_ref \
364	(self.data_points,
365	self.geo_reference)
366	else:
367	return self.data_points
368
369
370	def get_attributes(self, attribute_name=None):
371	"""Return values for one named attribute.
372
373	If attribute_name is None, default_attribute_name is used
374	"""
375
376	if attribute_name is None:
377	if self.default_attribute_name is not None:
378	attribute_name = self.default_attribute_name
379	else:
380	attribute_name = self.attributes.keys()[0]
381	# above line takes the first one from keys
382
383	if self.verbose is True:
384	print 'Using attribute %s' %attribute_name
385	print 'Available attributes: %s' %(self.attributes.keys())
386
387	msg = 'Attribute name %s does not exist in data set' %attribute_name
388	assert self.attributes.has_key(attribute_name), msg
389
390	return self.attributes[attribute_name]
391
392	def get_all_attributes(self):
393	"""
394	Return values for all attributes.
395	The return value is either None or a dictionary (possibly empty).
396	"""
397
398	return self.attributes
399
400	def __add__(self, other):
401	"""
402	Returns the addition of 2 geospatical objects,
403	objects are concatencated to the end of each other
404
405	NOTE: doesn't add if objects contain different
406	attributes
407
408	Always return relative points!
409	"""
410
411	# find objects zone and checks if the same
412	geo_ref1 = self.get_geo_reference()
413	zone1 = geo_ref1.get_zone()
414
415	geo_ref2 = other.get_geo_reference()
416	zone2 = geo_ref2.get_zone()
417
418	geo_ref1.reconcile_zones(geo_ref2)
419
420
421	# sets xll and yll as the smallest from self and other
422	# FIXME (Duncan and Ole): use lower left corner derived from
423	# absolute coordinates
424	if self.geo_reference.xllcorner <= other.geo_reference.xllcorner:
425	xll = self.geo_reference.xllcorner
426	else:
427	xll = other.geo_reference.xllcorner
428
429	if self.geo_reference.yllcorner <= other.geo_reference.yllcorner:
430	yll = self.geo_reference.yllcorner
431	else:
432	yll = other.geo_reference.yllcorner
433	new_geo_ref = Geo_reference(geo_ref1.get_zone(), xll, yll)
434
435	xll = yll = 0.
436
437	relative_points1 = self.get_data_points(absolute = False)
438	relative_points2 = other.get_data_points(absolute = False)
439
440
441	new_relative_points1 = new_geo_ref.\
442	change_points_geo_ref(relative_points1,
443	geo_ref1)
444	new_relative_points2 = new_geo_ref.\
445	change_points_geo_ref(relative_points2,
446	geo_ref2)
447
448	# Now both point sets are relative to new_geo_ref and
449	# zones have been reconciled
450
451	# Concatenate points
452	new_points = concatenate((new_relative_points1,
453	new_relative_points2),
454	axis = 0)
455
456	# Concatenate attributes if any
457	if self.attributes is None:
458	if other.attributes is not None:
459	msg = 'Both geospatial_data objects must have the same \n'
460	msg += 'attributes to allow addition.'
461	raise Exception, msg
462
463	new_attributes = None
464	else:
465	new_attributes = {}
466	for x in self.attributes.keys():
467	if other.attributes.has_key(x):
468
469	attrib1 = self.attributes[x]
470	attrib2 = other.attributes[x]
471	new_attributes[x] = concatenate((attrib1, attrib2))
472
473	else:
474	msg = 'Both geospatial_data objects must have the same \n'
475	msg += 'attributes to allow addition.'
476	raise Exception, msg
477
478	# Instantiate new data object and return
479	return Geospatial_data(new_points,
480	new_attributes,
481	new_geo_ref)
482
483	###
484	# IMPORT/EXPORT POINTS FILES
485	###
486
487	def import_points_file(self, file_name, delimiter=None, verbose=False):
488	""" load an .xya or .pts file
489	Note: will throw an IOError if it can't load the file.
490	Catch these!
491
492	Post condition: self.attributes dictionary has been set
493	"""
494
495	if access(file_name, F_OK) == 0 :
496	msg = 'File %s does not exist or is not accessible' %file_name
497	raise IOError, msg
498
499	attributes = {}
500	if file_name[-4:]== ".xya":
501	msg = 'Text file format is moving to comma seperated .txt files.'
502	warn(msg, DeprecationWarning)
503	try:
504	if delimiter == None:
505	try:
506	fd = open(file_name)
507	data_points, attributes, geo_reference =\
508	_read_xya_file(fd, ',')
509	except TitleError:
510	fd.close()
511	fd = open(file_name)
512	data_points, attributes, geo_reference =\
513	_read_xya_file(fd, ' ')
514	else:
515	fd = open(file_name)
516	data_points, attributes, geo_reference =\
517	_read_xya_file(fd, delimiter)
518	fd.close()
519	except (IndexError,ValueError,SyntaxError):
520	fd.close()
521	msg = 'Could not open file %s ' %file_name
522	raise IOError, msg
523	except IOError, e:
524	fd.close()
525	# Catch this to add an error message
526	msg = 'Could not open file or incorrect file format %s:%s'\
527	%(file_name, e)
528	raise IOError, msg
529
530	elif file_name[-4:]== ".pts":
531	try:
532	data_points, attributes, geo_reference =\
533	_read_pts_file(file_name, verbose)
534	except IOError, e:
535	msg = 'Could not open file %s ' %file_name
536	raise IOError, msg
537
538	elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
539	#let's do ticket#116 stuff
540	#
541	try:
542	data_points, attributes, geo_reference =\
543	_read_csv_file(file_name, verbose)
544	except IOError, e:
545	msg = 'Could not open file %s ' %file_name
546	raise IOError, msg
547	else:
548	msg = 'Extension %s is unknown' %file_name[-4:]
549	raise IOError, msg
550	# print'in import data_points', data_points
551	# print'in import attributes', attributes
552	# print'in import data_points', geo_reference
553	self.data_points = data_points
554	self.attributes = attributes
555	self.geo_reference = geo_reference
556
557	# return all_data
558
559	def export_points_file(self, file_name, absolute=True, as_lat_long=False):
560
561	"""
562	write a points file, file_name, as a text (.xya) or binary (.pts) file
563	file_name is the file name, including the extension
564	The point_dict is defined at the top of this file.
565
566	If absolute is True data points at returned added to the xll and yll
567	and geo_reference as None
568
569	If absolute is False data points at returned as relative to the xll
570	and yll and geo_reference remains uneffected
571	"""
572
573	if absolute is False and file_name[-4:] == ".xya":
574	msg = 'The text file values must be absolute. '
575	msg += 'Text file format is moving to comma seperated .txt files.'
576	warn(msg, DeprecationWarning)
577
578	if (file_name[-4:] == ".xya"):
579	msg = '.xya format is deprecated. Please use .txt.'
580	warn(msg, DeprecationWarning)
581	if absolute is True:
582	_write_xya_file(file_name,
583	self.get_data_points(absolute=True),
584	self.get_all_attributes())
585	else:
586	_write_xya_file(file_name,
587	self.get_data_points(absolute=False),
588	self.get_all_attributes(),
589	self.get_geo_reference())
590
591	elif (file_name[-4:] == ".pts"):
592	if absolute is True:
593	_write_pts_file(file_name,
594	self.get_data_points(absolute),
595	self.get_all_attributes())
596	else:
597	_write_pts_file(file_name,
598	self.get_data_points(absolute),
599	self.get_all_attributes(),
600	self.get_geo_reference())
601
602	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
603	msg = "ERROR: trying to write a .txt file with relative data."
604	assert absolute, msg
605	_write_csv_file(file_name,
606	self.get_data_points(absolute=True,
607	as_lat_long=as_lat_long),
608	self.get_all_attributes(),
609	as_lat_long=as_lat_long)
610
611	elif file_name[-4:] == ".urs" :
612	msg = "ERROR: Can not write a .urs file as a relative file."
613	assert absolute, msg
614	_write_urs_file(file_name,
615	self.get_data_points(as_lat_long=True))
616
617	else:
618	msg = 'Unknown file type %s ' %file_name
619	raise IOError, msg
620
621	def get_sample(self, indices):
622	""" Returns a object which is a subset of the original
623	and the data points and attributes in this new object refer to
624	the indices provided
625
626	Input
627	indices- a list of integers that represent the new object
628	Output
629	New geospatial data object representing points specified by
630	the indices
631	"""
632	#FIXME: add the geo_reference to this
633
634	points = self.get_data_points()
635	sampled_points = take(points, indices)
636
637	attributes = self.get_all_attributes()
638
639	sampled_attributes = {}
640	if attributes is not None:
641	for key, att in attributes.items():
642	sampled_attributes[key] = take(att, indices)
643
644	return Geospatial_data(sampled_points, sampled_attributes)
645
646
647	def split(self, factor=0.5, verbose=False):
648	"""Returns two geospatial_data object, first is size of the 'factor'
649	smaller the original and the second is the remainer. The two new
650	object are disjoin set of each other.
651
652	Points of the two new object have selected RANDOMLY.
653	AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
654
655
656	Input - the factor which to split the object, if 0.1 then 10% of the
657	object will be returned
658
659	Output - two geospatial_data objects that are disjoint sets of the
660	original
661	"""
662
663	i=0
664	self_size = len(self)
665	random_list = []
666	remainder_list = []
667	new_size = round(factor*self_size)
668	# print'Split original %s by %s' %(self_size, factor)
669	# print'New samples are %s and %s in size' %(int(round(factor*self_size)),int(self_size-new_size))
670
671	#find unique random numbers
672	if verbose: print "make unique random number list and get indices"
673	while i < new_size:
674	random_num = randint(0,self_size-1)
675	if random_num not in random_list:
676	random_list.append(random_num)
677	i=i+1
678
679	#Make list of opposite to random_list
680	if verbose: print "make list of opposite to random list"
681	for i in range(0,self_size,1):
682	remainder_list.append(i)
683
684	#remove random list from remainder_list to get correct remainder_list
685	#need to sort and reverse so the pop() works correctly
686	random_list.sort()
687	random_list.reverse()
688	if verbose: print "get indices of opposite to random list"
689	for i in random_list:
690	remainder_list.pop(i)
691	if verbose:
692	if ((i/100)==(float(i)/100)): print "reached: ",i
693
694	#get new samples
695	if verbose: print "get values of indices for random list"
696	G1 = self.get_sample(random_list)
697	if verbose: print "get values of indices for opposite of random list"
698	G2 = self.get_sample(remainder_list)
699
700	return G1, G2
701
702	def __iter__(self):
703	"""
704	read in the header and save the file pointer position
705	"""
706
707	from Scientific.IO.NetCDF import NetCDFFile
708
709	#FIXME - what to do if the file isn't there
710
711	if self.file_name[-4:]== ".xya":
712	#let's just read it all
713	pass
714	elif self.file_name[-4:]== ".pts":
715
716	# see if the file is there. Throw a QUIET IO error if it isn't
717	fd = open(self.file_name,'r')
718	fd.close()
719
720	#throws prints to screen if file not present
721	self.fid = NetCDFFile(self.file_name, 'r')
722
723	self.blocking_georef, self.blocking_keys, self.last_row = \
724	_read_pts_file_header(self.fid, self.verbose)
725	self.start_row=0
726	self.show_verbose = 0
727	self.verbose_block_size = (self.last_row +10) /10
728	else:
729	file_pointer = open(self.file_name)
730	self.header, self.file_pointer = \
731	_read_csv_file_header(file_pointer)
732	self.blocking_georef = None # Used for reconciling zones
733	if self.max_read_lines is None:
734	self.max_read_lines = MAX_READ_LINES
735	return self
736
737	def next(self):
738	# read a block, instanciate a new geospatial and return it
739	if self.file_name[-4:]== ".xya" :
740	if not hasattr(self,'finished_reading') or \
741	self.finished_reading is False:
742	#let's just read it all
743	geo = Geospatial_data(self.file_name)
744	self.finished_reading = True
745	else:
746	raise StopIteration
747	self.finished_reading = False
748
749	elif self.file_name[-4:]== ".pts":
750	if self.start_row == self.last_row:
751	# read the end of the file last iteration
752	# remove blocking attributes
753	self.fid.close()
754	del self.max_read_lines
755	del self.blocking_georef
756	del self.last_row
757	del self.start_row
758	del self.blocking_keys
759	del self.fid
760	raise StopIteration
761	fin_row = self.start_row + self.max_read_lines
762	if fin_row > self.last_row:
763	fin_row = self.last_row
764
765
766
767	if self.verbose is True:
768	if self.show_verbose >= self.start_row and \
769	self.show_verbose < fin_row:
770	print 'Doing %d of %d' %(self.start_row, self.last_row+10)
771	self.show_verbose += max(self.max_read_lines,
772	self.verbose_block_size)
773	#call stuff
774	pointlist, att_dict, = \
775	_read_pts_file_blocking( self.fid,
776	self.start_row,
777	fin_row,
778	self.blocking_keys
779	)
780	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
781	self.start_row = fin_row
782
783	else:
784	try:
785	pointlist, att_dict, geo_ref, self.file_pointer = \
786	_read_csv_file_blocking( self.file_pointer,
787	self.header[:],
788	max_read_lines=self.max_read_lines,
789	verbose=self.verbose)
790
791	# Check that the zones haven't changed.
792	if geo_ref is not None:
793	geo_ref.reconcile_zones(self.blocking_georef)
794	self.blocking_georef = geo_ref
795	elif self.blocking_georef is not None:
796
797	msg = 'Geo reference given, then not given.'
798	msg += ' This should not happen.'
799	raise ValueError, msg
800	geo = Geospatial_data(pointlist, att_dict, geo_ref)
801	except StopIteration:
802	self.file_pointer.close()
803	del self.header
804	del self.file_pointer
805	raise StopIteration
806	except ANUGAError:
807	self.file_pointer.close()
808	del self.header
809	del self.file_pointer
810	raise
811	return geo
812
813	def _set_using_lat_long(latitudes,
814	longitudes,
815	geo_reference,
816	data_points,
817	points_are_lats_longs):
818	"""
819	if the points has lat long info, assume it is in (lat, long) order.
820	"""
821
822	if geo_reference is not None:
823	msg = """A georeference is specified yet latitude and longitude
824	are also specified!"""
825	raise ValueError, msg
826
827	if data_points is not None and not points_are_lats_longs:
828	msg = """Data points are specified yet latitude and
829	longitude are also specified!"""
830	raise ValueError, msg
831
832	if points_are_lats_longs:
833	if data_points is None:
834	msg = """Data points are not specified !"""
835	raise ValueError, msg
836	lats_longs = ensure_numeric(data_points)
837	latitudes = ravel(lats_longs[:,0:1])
838	longitudes = ravel(lats_longs[:,1:])
839
840	if latitudes is None and longitudes is None:
841	msg = """Latitudes and Longitudes are not."""
842	raise ValueError, msg
843
844	if latitudes is None:
845	msg = """Longitudes are specified yet latitudes aren't!"""
846	raise ValueError, msg
847
848	if longitudes is None:
849	msg = """Latitudes are specified yet longitudes aren't!"""
850	raise ValueError, msg
851
852	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
853	longitudes=longitudes)
854	return data_points, Geo_reference(zone=zone)
855
856	def _read_pts_file(file_name, verbose=False):
857	"""Read .pts NetCDF file
858
859	Return a dic of array of points, and dic of array of attribute
860	eg
861	dic['points'] = [[1.0,2.0],[3.0,5.0]]
862	dic['attributelist']['elevation'] = [[7.0,5.0]
863	"""
864
865	from Scientific.IO.NetCDF import NetCDFFile
866
867	if verbose: print 'Reading ', file_name
868
869
870	# see if the file is there. Throw a QUIET IO error if it isn't
871	fd = open(file_name,'r')
872	fd.close()
873
874	#throws prints to screen if file not present
875	fid = NetCDFFile(file_name, 'r')
876
877	pointlist = array(fid.variables['points'])
878	keys = fid.variables.keys()
879	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
880	try:
881	keys.remove('points')
882	except IOError, e:
883	fid.close()
884	msg = 'Expected keyword "points" but could not find it'
885	raise IOError, msg
886
887	attributes = {}
888	for key in keys:
889	if verbose: print "reading attribute '%s'" %key
890
891	attributes[key] = array(fid.variables[key])
892
893
894	try:
895	geo_reference = Geo_reference(NetCDFObject=fid)
896	except AttributeError, e:
897	geo_reference = None
898
899	fid.close()
900
901	return pointlist, attributes, geo_reference
902
903
904	def _read_csv_file(file_name, verbose=False):
905	"""Read .csv file
906
907	Return a dic of array of points, and dic of array of attribute
908	eg
909	dic['points'] = [[1.0,2.0],[3.0,5.0]]
910	dic['attributelist']['elevation'] = [[7.0,5.0]
911	"""
912
913	#from anuga.shallow_water.data_manager import Exposure_csv
914	#csv =Exposure_csv(file_name)
915
916	file_pointer = open(file_name)
917	header, file_pointer = _read_csv_file_header(file_pointer)
918	try:
919	pointlist, att_dict, geo_ref, file_pointer = \
920	_read_csv_file_blocking( \
921	file_pointer,
922	header,
923	max_read_lines=1e30) #If the file is bigger that this, block..
924	except ANUGAError:
925	file_pointer.close()
926	raise
927	file_pointer.close()
928	return pointlist, att_dict, geo_ref
929
930	CSV_DELIMITER = ','
931	def _read_csv_file_header(file_pointer,
932	delimiter=CSV_DELIMITER,
933	verbose=False):
934
935	"""Read the header of a .csv file
936	Return a list of the header names
937	"""
938	line = file_pointer.readline()
939	header = clean_line(line, delimiter)
940	return header, file_pointer
941
942	def _read_csv_file_blocking(file_pointer, header,
943	delimiter=CSV_DELIMITER,
944	max_read_lines=MAX_READ_LINES,
945	verbose=False):
946
947
948	"""
949	Read the body of a .csv file.
950	header: The list header of the csv file, with the x and y labels.
951	"""
952	points = []
953	pointattributes = []
954	att_dict = {}
955
956	#This is to remove the x and y headers.
957	header = header[:]
958	x_header = header.pop(0)
959	y_header = header.pop(0)
960
961	read_lines = 0
962	while read_lines<max_read_lines:
963	line = file_pointer.readline()
964	#print "line",line
965	numbers = clean_line(line,delimiter)
966	if len(numbers) <= 1:
967	break
968	if line[0] == '#':
969	continue
970	read_lines += 1
971	if True: # remove.. #if numbers != []:
972	try:
973	x = float(numbers[0])
974	y = float(numbers[1])
975	points.append([x,y])
976	numbers.pop(0)
977	numbers.pop(0)
978	if len(header) != len(numbers):
979
980	file_pointer.close()
981	# It might not be a problem with the header
982	#raise TitleAmountError
983	msg = "File load error. There might be a problem with the file header"
984	raise IOError, msg
985	for i,num in enumerate(numbers):
986	num.strip()
987	if num != '\n' and num != '':
988	#attributes.append(float(num))
989	att_dict.setdefault(header[i],[]).append(float(num))
990	#except IOError:
991	except ValueError:
992	raise SyntaxError
993	if points == []:
994	raise StopIteration
995
996
997	pointlist = array(points).astype(Float)
998	for key in att_dict.keys():
999	att_dict[key] = array(att_dict[key]).astype(Float)
1000
1001	#Do stuff here so the info is in lat's and longs
1002	geo_ref = None
1003	x_header = lower(x_header[:3])
1004	y_header = lower(y_header[:3])
1005	if (x_header == 'lon' or x_header == 'lat') and \
1006	(y_header == 'lon' or y_header == 'lat'):
1007	if x_header == 'lon':
1008	longitudes = ravel(pointlist[:,0:1])
1009	latitudes = ravel(pointlist[:,1:])
1010	else:
1011	latitudes = ravel(pointlist[:,0:1])
1012	longitudes = ravel(pointlist[:,1:])
1013
1014	pointlist, geo_ref = _set_using_lat_long(latitudes,
1015	longitudes,
1016	geo_reference=None,
1017	data_points=None,
1018	points_are_lats_longs=False)
1019	return pointlist, att_dict, geo_ref, file_pointer
1020
1021	def _read_pts_file_header(fid, verbose=False):
1022
1023	"""
1024	Read the geo_reference of a .pts file
1025	"""
1026
1027	keys = fid.variables.keys()
1028	try:
1029	keys.remove('points')
1030	except IOError, e:
1031	fid.close()
1032	msg = 'Expected keyword "points" but could not find it'
1033	raise IOError, msg
1034	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1035
1036	try:
1037	geo_reference = Geo_reference(NetCDFObject=fid)
1038	except AttributeError, e:
1039	geo_reference = None
1040
1041	return geo_reference, keys, fid.dimensions['number_of_points']
1042
1043	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1044	#verbose=False):
1045
1046
1047	"""
1048	Read the body of a .csv file.
1049	header: The list header of the csv file, with the x and y labels.
1050	"""
1051
1052	pointlist = array(fid.variables['points'][start_row:fin_row])
1053
1054	attributes = {}
1055	for key in keys:
1056	attributes[key] = array(fid.variables[key][start_row:fin_row])
1057
1058	return pointlist, attributes
1059
1060
1061	def _read_xya_file(fd, delimiter):
1062	points = []
1063	pointattributes = []
1064	title = fd.readline()
1065	att_names = clean_line(title,delimiter)
1066	att_dict = {}
1067	line = fd.readline()
1068	numbers = clean_line(line,delimiter)
1069
1070	while len(numbers) > 1 and line[0] <> '#':
1071	if numbers != []:
1072	try:
1073	x = float(numbers[0])
1074	y = float(numbers[1])
1075	points.append([x,y])
1076	numbers.pop(0)
1077	numbers.pop(0)
1078	if len(att_names) != len(numbers):
1079	fd.close()
1080	# It might not be a problem with the title
1081	#raise TitleAmountError
1082	raise IOError
1083	for i,num in enumerate(numbers):
1084	num.strip()
1085	if num != '\n' and num != '':
1086	#attributes.append(float(num))
1087	att_dict.setdefault(att_names[i],[]).append(float(num))
1088	except ValueError:
1089	raise SyntaxError
1090	line = fd.readline()
1091	numbers = clean_line(line,delimiter)
1092
1093	if line == '':
1094	geo_reference = None
1095	else:
1096	geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
1097
1098
1099	pointlist = array(points).astype(Float)
1100	for key in att_dict.keys():
1101	att_dict[key] = array(att_dict[key]).astype(Float)
1102
1103	return pointlist, att_dict, geo_reference
1104
1105	def _write_pts_file(file_name,
1106	write_data_points,
1107	write_attributes=None,
1108	write_geo_reference=None):
1109	"""
1110	Write .pts NetCDF file
1111
1112	NOTE: Below might not be valid ask Duncan : NB 5/2006
1113
1114	WARNING: This function mangles the point_atts data structure
1115	#F??ME: (DSG)This format has issues.
1116	# There can't be an attribute called points
1117	# consider format change
1118	# method changed by NB not sure if above statement is correct
1119
1120	should create new test for this
1121	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1122	for key in point_atts.keys():
1123	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1124	assert key in legal_keys, msg
1125	"""
1126	from Scientific.IO.NetCDF import NetCDFFile
1127	# NetCDF file definition
1128	outfile = NetCDFFile(file_name, 'w')
1129
1130	#Create new file
1131	outfile.institution = 'Geoscience Australia'
1132	outfile.description = 'NetCDF format for compact and portable storage ' +\
1133	'of spatial point data'
1134
1135	# dimension definitions
1136	shape = write_data_points.shape[0]
1137	outfile.createDimension('number_of_points', shape)
1138	outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1139
1140	# variable definition
1141	outfile.createVariable('points', Float, ('number_of_points',
1142	'number_of_dimensions'))
1143
1144	#create variables
1145	outfile.variables['points'][:] = write_data_points #.astype(Float32)
1146
1147	if write_attributes is not None:
1148	for key in write_attributes.keys():
1149	outfile.createVariable(key, Float, ('number_of_points',))
1150	outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1151
1152	if write_geo_reference is not None:
1153	write_geo_reference.write_NetCDF(outfile)
1154
1155	outfile.close()
1156
1157
1158
1159	def _write_xya_file(file_name,
1160	write_data_points,
1161	write_attributes=None,
1162	write_geo_reference=None,
1163	delimiter=','):
1164	"""
1165	export a file, file_name, with the xya format
1166
1167	"""
1168	points = write_data_points
1169	pointattributes = write_attributes
1170
1171	fd = open(file_name,'w')
1172	titlelist = ""
1173	if pointattributes is not None:
1174	for title in pointattributes.keys():
1175	titlelist = titlelist + title + delimiter
1176	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1177	fd.write(titlelist+"\n")
1178
1179	#<vertex #> <x> <y> [attributes]
1180	for i, vert in enumerate( points):
1181
1182
1183	if pointattributes is not None:
1184	attlist = ","
1185	for att in pointattributes.keys():
1186	attlist = attlist + str(pointattributes[att][i])+ delimiter
1187	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1188	attlist.strip()
1189	else:
1190	attlist = ''
1191
1192	fd.write(str(vert[0]) + delimiter +
1193	str(vert[1]) + attlist + "\n")
1194
1195	if write_geo_reference is not None:
1196	write_geo_reference.write_ASCII(fd)
1197	fd.close()
1198
1199
1200	def _write_csv_file(file_name,
1201	write_data_points,
1202	write_attributes=None,
1203	as_lat_long=False,
1204	delimiter=','):
1205	"""
1206	export a file, file_name, with the xya format
1207
1208	"""
1209	points = write_data_points
1210	pointattributes = write_attributes
1211
1212	fd = open(file_name,'w')
1213	if as_lat_long:
1214	titlelist = "latitude" + delimiter + "longitude" + delimiter
1215	else:
1216	titlelist = "x" + delimiter + "y" + delimiter
1217	if pointattributes is not None:
1218	for title in pointattributes.keys():
1219	titlelist = titlelist + title + delimiter
1220	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1221	fd.write(titlelist+"\n")
1222
1223	# <x/lat> <y/long> [attributes]
1224	for i, vert in enumerate( points):
1225
1226
1227	if pointattributes is not None:
1228	attlist = ","
1229	for att in pointattributes.keys():
1230	attlist = attlist + str(pointattributes[att][i])+ delimiter
1231	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1232	attlist.strip()
1233	else:
1234	attlist = ''
1235
1236	fd.write(str(vert[0]) + delimiter +
1237	str(vert[1]) + attlist + "\n")
1238
1239	fd.close()
1240
1241	def _write_urs_file(file_name,
1242	points,
1243	delimiter=' '):
1244	"""
1245	export a file, file_name, with the urs format
1246	the data points are in lats and longs
1247
1248	"""
1249	fd = open(file_name,'w')
1250	fd.write(str(len(points))+"\n")
1251	# <lat> <long> <id#>
1252	for i, vert in enumerate( points):
1253	fd.write(str(round(vert[0],7)) + delimiter + \
1254	str(round(vert[1],7)) + delimiter +str(i+1)+ "\n")
1255	fd.close()
1256
1257	def _point_atts2array(point_atts):
1258	point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1259
1260	for key in point_atts['attributelist'].keys():
1261	point_atts['attributelist'][key]=\
1262	array(point_atts['attributelist'][key]).astype(Float)
1263	return point_atts
1264
1265
1266
1267
1268	def geospatial_data2points_dictionary(geospatial_data):
1269	"""Convert geospatial data to points_dictionary
1270	"""
1271
1272	points_dictionary = {}
1273	points_dictionary['pointlist'] = geospatial_data.data_points
1274
1275	points_dictionary['attributelist'] = {}
1276
1277	for attribute_name in geospatial_data.attributes.keys():
1278	val = geospatial_data.attributes[attribute_name]
1279	points_dictionary['attributelist'][attribute_name] = val
1280
1281	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1282
1283	return points_dictionary
1284
1285
1286	def points_dictionary2geospatial_data(points_dictionary):
1287	"""Convert points_dictionary to geospatial data object
1288	"""
1289
1290	msg = 'Points dictionary must have key pointlist'
1291	assert points_dictionary.has_key('pointlist'), msg
1292
1293	msg = 'Points dictionary must have key attributelist'
1294	assert points_dictionary.has_key('attributelist'), msg
1295
1296	if points_dictionary.has_key('geo_reference'):
1297	geo = points_dictionary['geo_reference']
1298	else:
1299	geo = None
1300
1301	return Geospatial_data(points_dictionary['pointlist'],
1302	points_dictionary['attributelist'],
1303	geo_reference = geo)
1304
1305	def clean_line(line,delimiter):
1306	"""Remove whitespace
1307	"""
1308	#print ">%s" %line
1309	line = line.strip()
1310	#print "stripped>%s" %line
1311	numbers = line.split(delimiter)
1312	i = len(numbers) - 1
1313	while i >= 0:
1314	if numbers[i] == '':
1315	numbers.pop(i)
1316	else:
1317	numbers[i] = numbers[i].strip()
1318
1319	i += -1
1320	#for num in numbers:
1321	# print "num>%s<" %num
1322	return numbers
1323
1324	def ensure_absolute(points, geo_reference=None):
1325	"""
1326	This function inputs several formats and
1327	outputs one format. - a numeric array of absolute points.
1328
1329	Inputed formats are;
1330	points: List or numeric array of coordinate pairs [xi, eta] of
1331	points or geospatial object or points file name
1332
1333	mesh_origin: A geo_reference object or 3-tuples consisting of
1334	UTM zone, easting and northing.
1335	If specified vertex coordinates are assumed to be
1336	relative to their respective origins.
1337	"""
1338	if isinstance(points,type('')):
1339	#It's a string
1340	#assume it is a point file
1341	points = Geospatial_data(file_name = points)
1342
1343	if isinstance(points,Geospatial_data):
1344	points = points.get_data_points( \
1345	absolute = True)
1346	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1347	assert geo_reference == None, msg
1348
1349	else:
1350	points = ensure_numeric(points, Float)
1351	if geo_reference is None:
1352	geo = None #Geo_reference()
1353	else:
1354	if isinstance(geo_reference, Geo_reference):
1355	geo = geo_reference
1356	else:
1357	geo = Geo_reference(geo_reference[0],
1358	geo_reference[1],
1359	geo_reference[2])
1360	points = geo.get_absolute(points)
1361	return points
1362
1363
1364	def ensure_geospatial(points, geo_reference=None):
1365	"""
1366	This function inputs several formats and
1367	outputs one format. - a geospatial_data instance.
1368
1369	Inputed formats are;
1370	points: List or numeric array of coordinate pairs [xi, eta] of
1371	points or geospatial object
1372
1373	mesh_origin: A geo_reference object or 3-tuples consisting of
1374	UTM zone, easting and northing.
1375	If specified vertex coordinates are assumed to be
1376	relative to their respective origins.
1377	"""
1378	if isinstance(points,Geospatial_data):
1379	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1380	assert geo_reference == None, msg
1381	return points
1382	else:
1383	points = ensure_numeric(points, Float)
1384	if geo_reference is None:
1385	geo = None
1386	else:
1387	if isinstance(geo_reference, Geo_reference):
1388	geo = geo_reference
1389	else:
1390	geo = Geo_reference(geo_reference[0],
1391	geo_reference[1],
1392	geo_reference[2])
1393	points = Geospatial_data(data_points=points, geo_reference=geo)
1394	return points
1395
1396	#def file2xya(filename):
1397
1398	# G = Geospatial_data(filename)
1399	# G.export_points_file()
1400
1401
1402
1403
1404	if __name__ == "__main__":
1405	pass
1406

Note: See TracBrowser for help on using the repository browser.

Download in other formats: