Context Navigation

source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4203

Last change on this file since 4203 was 4202, checked in by duncan, 18 years ago
sigh. the 'bug' that I fixed in 4199 wasn't a bug. It was correct. Reverting by doing svn merge -r4199:4198 https://datamining.anu.edu.au/svn/ga/ in inundation dir
File size: 47.1 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5	from sys import maxint
6	from os import access, F_OK, R_OK
7	from types import DictType
8	from warnings import warn
9	from string import lower
10	from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11	size, shape
12	from random import randint
13	#from MA import tolist
14
15	from Scientific.IO.NetCDF import NetCDFFile
16
17	from anuga.utilities.numerical_tools import ensure_numeric
18	from anuga.coordinate_transforms.geo_reference import Geo_reference, TitleError
19	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
20	from anuga.utilities.anuga_exceptions import ANUGAError
21
22	MAX_READ_LINES = 500
23	class Geospatial_data:
24
25	def __init__(self,
26	data_points=None, # this can also be a points file name
27	attributes=None,
28	geo_reference=None,
29	default_attribute_name=None,
30	file_name=None,
31	delimiter=None,
32	latitudes=None,
33	longitudes=None,
34	points_are_lats_longs=False,
35	max_read_lines=None,
36	load_file_now=True,
37	verbose=False):
38
39
40	"""
41	Create instance from data points and associated attributes
42
43	data_points: x,y coordinates in meters. Type must be either a
44	sequence of 2-tuples or an Mx2 Numeric array of floats. A file name
45	can also be passed in here.
46
47	attributes: Associated values for each data point. The type
48	must be either a list or an array of length M or a dictionary
49	of lists (or arrays) of length M. In the latter case the keys
50	in the dictionary represent the attribute names, in the former
51	the attribute will get the default name "attribute".
52
53	geo_reference: Object representing the origin of the data
54	points. It contains UTM zone, easting and northing and data
55	points are assumed to be relative to this origin.
56	If geo_reference is None, the default geo ref object is used.
57
58	default_attribute_name: Name of default attribute to be used with
59	get_attribute_values. The idea is that the dataset can be
60	equipped with information about which attribute to return.
61	If None, the default is the "first"
62
63	latitudes, longitudes: Vectors of latitudes and longitudes,
64	used to specify location instead of points.
65
66	points_are_lats_longs: Set this as true if the points are actually
67	lats and longs, not UTM
68
69	max_read_lines: The number of rows read into memory when using
70	blocking to read a file.
71
72	load_file_now: If true the file is automatically loaded
73	into the geospatial instance. Used when blocking.
74
75	file_name: Name of input netCDF file or .txt file. netCDF file must
76	have dimensions "points" etc.
77	.txt file is a comma seperated file with x, y and attribute
78	data.
79
80	The first line has the titles of the columns. The first two
81	column titles are checked to see if they start with lat or
82	long (not case sensitive). If so the data is assumed to be
83	latitude and longitude, in decimal format and converted to
84	UTM. Otherwise the first two columns are assumed to be the x
85	and y, and the title names acually used are ignored.
86
87
88	The format for a .txt file is:
89	1st line: [column names]
90	other lines: x y [attributes]
91
92	for example:
93	x, y, elevation, friction
94	0.6, 0.7, 4.9, 0.3
95	1.9, 2.8, 5, 0.3
96	2.7, 2.4, 5.2, 0.3
97
98	The first two columns are always assumed to be x, y
99	coordinates.
100
101	An issue with the xya format is that the attribute column order
102	is not be controlled. The info is stored in a dictionary and it's
103	written in an order dependent on the hash order
104
105	The format for a Points dictionary is:
106
107	['pointlist'] a 2 column array describing points. 1st column x,
108	2nd column y.
109	['attributelist'], a dictionary of 1D arrays, representing
110	attribute values at the point. The dictionary key is the attribute
111	header.
112	['geo_reference'] a Geo_refernece object. Use if the point
113	information is relative. This is optional.
114	eg
115	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
116	dic['attributelist']['elevation'] = [[7.0,5.0]
117
118	delimiter: is the file delimiter that will be used when
119	importing a .xya file, which is being phased out.
120
121	verbose:
122
123
124	"""
125
126	if isinstance(data_points, basestring):
127	# assume data point is really a file name
128	file_name = data_points
129
130	self.set_verbose(verbose)
131	self.geo_reference=None #create the attribute
132	self.file_name = file_name
133	self.max_read_lines = max_read_lines
134
135	if delimiter is not None:
136	msg = 'Specifying delimiters will be removed.'
137	msg = 'Text file format is moving to comma seperated .txt files.'
138	warn(msg, DeprecationWarning)
139	if file_name is None:
140	if delimiter is not None:
141	msg = 'No file specified yet a delimiter is provided!'
142	raise ValueError, msg
143	file_name = None
144	if latitudes is not None or longitudes is not None or \
145	points_are_lats_longs:
146	data_points, geo_reference = \
147	_set_using_lat_long(latitudes=latitudes,
148	longitudes=longitudes,
149	geo_reference=geo_reference,
150	data_points=data_points,
151	points_are_lats_longs=points_are_lats_longs)
152	self.check_data_points(data_points)
153	self.set_attributes(attributes)
154	self.set_geo_reference(geo_reference)
155	self.set_default_attribute_name(default_attribute_name)
156
157	elif load_file_now is True:
158	# watch for case where file name and points,
159	# attributes etc are provided!!
160	# if file name then all provided info will be removed!
161	self.import_points_file(file_name, delimiter, verbose)
162
163	self.check_data_points(self.data_points)
164	self.set_attributes(self.attributes)
165	self.set_geo_reference(self.geo_reference)
166	self.set_default_attribute_name(default_attribute_name)
167
168	#Why?
169	#assert self.attributes is None or isinstance(self.attributes, DictType)
170	#This is a hassle when blocking, so I've removed it.
171
172
173	def __len__(self):
174	return len(self.data_points)
175
176	def __repr__(self):
177	return str(self.get_data_points(absolute=True))
178
179
180	def check_data_points(self, data_points):
181	"""Checks data points
182	"""
183
184	if data_points is None:
185	self.data_points = None
186	msg = 'There is no data or file provided!'
187	raise ValueError, msg
188
189	else:
190	self.data_points = ensure_numeric(data_points)
191	#print "self.data_points.shape",self.data_points.shape
192	if not (0,) == self.data_points.shape:
193	assert len(self.data_points.shape) == 2
194	assert self.data_points.shape[1] == 2
195
196	def set_attributes(self, attributes):
197	"""Check and assign attributes dictionary
198	"""
199
200	if attributes is None:
201	self.attributes = None
202	return
203
204	if not isinstance(attributes, DictType):
205	#Convert single attribute into dictionary
206	attributes = {'attribute': attributes}
207
208	#Check input attributes
209	for key in attributes.keys():
210	try:
211	attributes[key] = ensure_numeric(attributes[key])
212	except:
213	msg = 'Attribute %s could not be converted' %key
214	msg += 'to a numeric vector'
215	raise msg
216
217	self.attributes = attributes
218
219
220	def set_geo_reference(self, geo_reference):
221
222	from anuga.coordinate_transforms.geo_reference import Geo_reference
223
224	if geo_reference is None:
225	geo_reference = Geo_reference() # Use default
226	if not isinstance(geo_reference, Geo_reference):
227	msg = 'Argument geo_reference must be a valid Geo_reference \n'
228	msg += 'object or None.'
229	raise msg
230
231	# if a geo ref already exists, change the point data to
232	# represent the new geo-ref
233	if self.geo_reference is not None:
234	#FIXME: Maybe put out a warning here...
235	self.data_points = self.get_data_points \
236	(geo_reference=geo_reference)
237
238	self.geo_reference = geo_reference
239
240
241	def set_default_attribute_name(self, default_attribute_name):
242	self.default_attribute_name = default_attribute_name
243
244	def set_verbose(self, verbose=False):
245	if verbose in [False, True]:
246	self.verbose = verbose
247	else:
248	msg = 'Illegal value: %s' %str(verbose)
249	raise Exception, msg
250
251	def clip(self, polygon, closed=True):
252	"""Clip geospatial data by a polygon
253
254	Input
255	polygon - Either a list of points, an Nx2 array or
256	a Geospatial data object.
257	closed - (optional) determine whether points on boundary should be
258	regarded as belonging to the polygon (closed = True)
259	or not (closed = False). Default is True.
260
261	Output
262	New geospatial data object representing points inside
263	specified polygon.
264
265	"""
266
267	from anuga.utilities.polygon import inside_polygon
268
269	if isinstance(polygon, Geospatial_data):
270	# Polygon is an object - extract points
271	polygon = polygon.get_data_points()
272
273	points = self.get_data_points()
274	inside_indices = inside_polygon(points, polygon, closed)
275
276	clipped_G = self.get_sample(inside_indices)
277	# clipped_points = take(points, inside_indices)
278
279	# Clip all attributes
280	# attributes = self.get_all_attributes()
281
282	# clipped_attributes = {}
283	# if attributes is not None:
284	# for key, att in attributes.items():
285	# clipped_attributes[key] = take(att, inside_indices)
286
287	# return Geospatial_data(clipped_points, clipped_attributes)
288	return clipped_G
289
290
291	def clip_outside(self, polygon, closed=True):
292	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
293
294	Input
295	polygon - Either a list of points, an Nx2 array or
296	a Geospatial data object.
297	closed - (optional) determine whether points on boundary should be
298	regarded as belonging to the polygon (closed = True)
299	or not (closed = False). Default is True.
300
301	Output
302	Geospatial data object representing point OUTSIDE specified polygon
303
304	"""
305
306	from anuga.utilities.polygon import outside_polygon
307
308	if isinstance(polygon, Geospatial_data):
309	# Polygon is an object - extract points
310	polygon = polygon.get_data_points()
311
312	points = self.get_data_points()
313	outside_indices = outside_polygon(points, polygon, closed)
314
315	clipped_G = self.get_sample(outside_indices)
316
317	# clipped_points = take(points, outside_indices)
318
319	# Clip all attributes
320	# attributes = self.get_all_attributes()
321
322	# clipped_attributes = {}
323	# if attributes is not None:
324	# for key, att in attributes.items():
325	# clipped_attributes[key] = take(att, outside_indices)
326
327	# return Geospatial_data(clipped_points, clipped_attributes)
328	return clipped_G
329
330
331	def get_geo_reference(self):
332	return self.geo_reference
333
334	def get_data_points(self, absolute=True, geo_reference=None):
335	"""Get coordinates for all data points as an Nx2 array
336
337	If absolute is False returned coordinates are relative to the
338	internal georeference's xll and yll corners, otherwise
339	absolute UTM coordinates are returned.
340
341	If a geo_reference is passed the points are returned relative
342	to that geo_reference.
343
344	Default: absolute is True.
345	"""
346
347	if absolute is True and geo_reference is None:
348	return self.geo_reference.get_absolute(self.data_points)
349	elif geo_reference is not None:
350	return geo_reference.change_points_geo_ref \
351	(self.data_points,
352	self.geo_reference)
353	else:
354	return self.data_points
355
356
357	def get_attributes(self, attribute_name=None):
358	"""Return values for one named attribute.
359
360	If attribute_name is None, default_attribute_name is used
361	"""
362
363	if attribute_name is None:
364	if self.default_attribute_name is not None:
365	attribute_name = self.default_attribute_name
366	else:
367	attribute_name = self.attributes.keys()[0]
368	# above line takes the first one from keys
369
370	if self.verbose is True:
371	print 'Using attribute %s' %attribute_name
372	print 'Available attributes: %s' %(self.attributes.keys())
373
374	msg = 'Attribute name %s does not exist in data set' %attribute_name
375	assert self.attributes.has_key(attribute_name), msg
376
377	return self.attributes[attribute_name]
378
379	def get_all_attributes(self):
380	"""
381	Return values for all attributes.
382	The return value is either None or a dictionary (possibly empty).
383	"""
384
385	return self.attributes
386
387	def __add__(self, other):
388	"""
389	Returns the addition of 2 geospatical objects,
390	objects are concatencated to the end of each other
391
392	NOTE: doesn't add if objects contain different
393	attributes
394
395	Always return relative points!
396	"""
397
398	# find objects zone and checks if the same
399	geo_ref1 = self.get_geo_reference()
400	zone1 = geo_ref1.get_zone()
401
402	geo_ref2 = other.get_geo_reference()
403	zone2 = geo_ref2.get_zone()
404
405	geo_ref1.reconcile_zones(geo_ref2)
406
407
408	# sets xll and yll as the smallest from self and other
409	# FIXME (Duncan and Ole): use lower left corner derived from
410	# absolute coordinates
411	if self.geo_reference.xllcorner <= other.geo_reference.xllcorner:
412	xll = self.geo_reference.xllcorner
413	else:
414	xll = other.geo_reference.xllcorner
415
416	if self.geo_reference.yllcorner <= other.geo_reference.yllcorner:
417	yll = self.geo_reference.yllcorner
418	else:
419	yll = other.geo_reference.yllcorner
420	new_geo_ref = Geo_reference(geo_ref1.get_zone(), xll, yll)
421
422	xll = yll = 0.
423
424	relative_points1 = self.get_data_points(absolute = False)
425	relative_points2 = other.get_data_points(absolute = False)
426
427
428	new_relative_points1 = new_geo_ref.\
429	change_points_geo_ref(relative_points1,
430	geo_ref1)
431	new_relative_points2 = new_geo_ref.\
432	change_points_geo_ref(relative_points2,
433	geo_ref2)
434
435	# Now both point sets are relative to new_geo_ref and
436	# zones have been reconciled
437
438	# Concatenate points
439	new_points = concatenate((new_relative_points1,
440	new_relative_points2),
441	axis = 0)
442
443	# Concatenate attributes if any
444	if self.attributes is None:
445	if other.attributes is not None:
446	msg = 'Both geospatial_data objects must have the same \n'
447	msg += 'attributes to allow addition.'
448	raise Exception, msg
449
450	new_attributes = None
451	else:
452	new_attributes = {}
453	for x in self.attributes.keys():
454	if other.attributes.has_key(x):
455
456	attrib1 = self.attributes[x]
457	attrib2 = other.attributes[x]
458	new_attributes[x] = concatenate((attrib1, attrib2))
459
460	else:
461	msg = 'Both geospatial_data objects must have the same \n'
462	msg += 'attributes to allow addition.'
463	raise Exception, msg
464
465	# Instantiate new data object and return
466	return Geospatial_data(new_points,
467	new_attributes,
468	new_geo_ref)
469
470	###
471	# IMPORT/EXPORT POINTS FILES
472	###
473
474	def import_points_file(self, file_name, delimiter=None, verbose=False):
475	""" load an .xya or .pts file
476	Note: will throw an IOError if it can't load the file.
477	Catch these!
478
479	Post condition: self.attributes dictionary has been set
480	"""
481
482	if access(file_name, F_OK) == 0 :
483	msg = 'File %s does not exist or is not accessible' %file_name
484	raise IOError, msg
485
486	attributes = {}
487	if file_name[-4:]== ".xya":
488	msg = 'Text file format is moving to comma seperated .txt files.'
489	warn(msg, DeprecationWarning)
490	try:
491	if delimiter == None:
492	try:
493	fd = open(file_name)
494	data_points, attributes, geo_reference =\
495	_read_xya_file(fd, ',')
496	except TitleError:
497	fd.close()
498	fd = open(file_name)
499	data_points, attributes, geo_reference =\
500	_read_xya_file(fd, ' ')
501	else:
502	fd = open(file_name)
503	data_points, attributes, geo_reference =\
504	_read_xya_file(fd, delimiter)
505	fd.close()
506	except (IndexError,ValueError,SyntaxError):
507	fd.close()
508	msg = 'Could not open file %s ' %file_name
509	raise IOError, msg
510	except IOError, e:
511	fd.close()
512	# Catch this to add an error message
513	msg = 'Could not open file or incorrect file format %s:%s'\
514	%(file_name, e)
515	raise IOError, msg
516
517	elif file_name[-4:]== ".pts":
518	try:
519	data_points, attributes, geo_reference =\
520	_read_pts_file(file_name, verbose)
521	except IOError, e:
522	msg = 'Could not open file %s ' %file_name
523	raise IOError, msg
524
525	elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
526	#let's do ticket#116 stuff
527	#
528	try:
529	data_points, attributes, geo_reference =\
530	_read_csv_file(file_name, verbose)
531	except IOError, e:
532	msg = 'Could not open file %s ' %file_name
533	raise IOError, msg
534	else:
535	msg = 'Extension %s is unknown' %file_name[-4:]
536	raise IOError, msg
537	# print'in import data_points', data_points
538	# print'in import attributes', attributes
539	# print'in import data_points', geo_reference
540	self.data_points = data_points
541	self.attributes = attributes
542	self.geo_reference = geo_reference
543
544	# return all_data
545
546	def export_points_file(self, file_name, absolute=True):
547
548	"""
549	write a points file, file_name, as a text (.xya) or binary (.pts) file
550	file_name is the file name, including the extension
551	The point_dict is defined at the top of this file.
552
553	If absolute is True data points at returned added to the xll and yll
554	and geo_reference as None
555
556	If absolute is False data points at returned as relative to the xll
557	and yll and geo_reference remains uneffected
558	"""
559
560	if absolute is False and file_name[-4:] == ".xya":
561	msg = 'The text file values must be absolute. '
562	msg += 'Text file format is moving to comma seperated .txt files.'
563	warn(msg, DeprecationWarning)
564
565	if (file_name[-4:] == ".xya"):
566	msg = '.xya format is deprecated. Please use .txt.'
567	warn(msg, DeprecationWarning)
568	if absolute is True:
569	_write_xya_file(file_name,
570	self.get_data_points(absolute=True),
571	self.get_all_attributes())
572	else:
573	_write_xya_file(file_name,
574	self.get_data_points(absolute=False),
575	self.get_all_attributes(),
576	self.get_geo_reference())
577
578	elif (file_name[-4:] == ".pts"):
579	if absolute is True:
580	_write_pts_file(file_name,
581	self.get_data_points(absolute),
582	self.get_all_attributes())
583	else:
584	_write_pts_file(file_name,
585	self.get_data_points(absolute),
586	self.get_all_attributes(),
587	self.get_geo_reference())
588
589	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
590	msg = "ERROR: trying to write a .txt file with relative data."
591	assert absolute, msg
592	_write_csv_file(file_name,
593	self.get_data_points(absolute=True),
594	self.get_all_attributes())
595
596	else:
597	msg = 'Unknown file type %s ' %file_name
598	raise IOError, msg
599
600	def get_sample(self, indices):
601	""" Returns a object which is a subset of the original
602	and the data points and attributes in this new object refer to
603	the indices provided
604
605	Input
606	indices- a list of integers that represent the new object
607	Output
608	New geospatial data object representing points specified by
609	the indices
610	"""
611	#FIXME: add the geo_reference to this
612
613	points = self.get_data_points()
614	sampled_points = take(points, indices)
615
616	attributes = self.get_all_attributes()
617
618	sampled_attributes = {}
619	if attributes is not None:
620	for key, att in attributes.items():
621	sampled_attributes[key] = take(att, indices)
622
623	return Geospatial_data(sampled_points, sampled_attributes)
624
625
626	def split(self, factor=0.5, verbose=False):
627	"""Returns two geospatial_data object, first is size of the 'factor'
628	smaller the original and the second is the remainer. The two new
629	object are disjoin set of each other.
630
631	Points of the two new object have selected RANDOMLY.
632	AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
633
634
635	Input - the factor which to split the object, if 0.1 then 10% of the
636	object will be returned
637
638	Output - two geospatial_data objects that are disjoint sets of the
639	original
640	"""
641
642	i=0
643	self_size = len(self)
644	random_list = []
645	remainder_list = []
646	new_size = round(factor*self_size)
647	# print'Split original %s by %s' %(self_size, factor)
648	# print'New samples are %s and %s in size' %(int(round(factor*self_size)),int(self_size-new_size))
649
650	#find unique random numbers
651	if verbose: print "make unique random number list and get indices"
652	while i < new_size:
653	random_num = randint(0,self_size-1)
654	if random_num not in random_list:
655	random_list.append(random_num)
656	i=i+1
657
658	#Make list of opposite to random_list
659	if verbose: print "make list of opposite to random list"
660	for i in range(0,self_size,1):
661	remainder_list.append(i)
662
663	#remove random list from remainder_list to get correct remainder_list
664	#need to sort and reverse so the pop() works correctly
665	random_list.sort()
666	random_list.reverse()
667	if verbose: print "get indices of oppisite to random list"
668	for i in random_list:
669	remainder_list.pop(i)
670
671	#get new samples
672	if verbose: print "get values of indices for random list"
673	G1 = self.get_sample(random_list)
674	if verbose: print "get values of indices for opposite of random list"
675	G2 = self.get_sample(remainder_list)
676
677	return G1, G2
678
679	def __iter__(self):
680	"""
681	read in the header and save the file pointer position
682	"""
683
684	from Scientific.IO.NetCDF import NetCDFFile
685
686	#FIXME - what to do if the file isn't there
687
688	if self.file_name[-4:]== ".xya":
689	#let's just read it all
690	pass
691	elif self.file_name[-4:]== ".pts":
692
693	# see if the file is there. Throw a QUIET IO error if it isn't
694	fd = open(self.file_name,'r')
695	fd.close()
696
697	#throws prints to screen if file not present
698	self.fid = NetCDFFile(self.file_name, 'r')
699
700	self.blocking_georef, self.blocking_keys, self.last_row = \
701	_read_pts_file_header(self.fid, self.verbose)
702	self.start_row=0
703	else:
704	file_pointer = open(self.file_name)
705	self.header, self.file_pointer = \
706	_read_csv_file_header(file_pointer)
707	self.blocking_georef = None # Used for reconciling zones
708	if self.max_read_lines is None:
709	self.max_read_lines = MAX_READ_LINES
710	return self
711
712	def next(self):
713	# read a block, instanciate a new geospatial and return it
714	if self.file_name[-4:]== ".xya" :
715	if not hasattr(self,'finished_reading') or \
716	self.finished_reading is False:
717	#let's just read it all
718	geo = Geospatial_data(self.file_name)
719	self.finished_reading = True
720	else:
721	raise StopIteration
722	self.finished_reading = False
723
724	elif self.file_name[-4:]== ".pts":
725	if self.start_row == self.last_row:
726	# read the end of the file last iteration
727	# remove blocking attributes
728	self.fid.close()
729	del self.max_read_lines
730	del self.blocking_georef
731	del self.last_row
732	del self.start_row
733	del self.blocking_keys
734	del self.fid
735	raise StopIteration
736	fin_row = self.start_row + self.max_read_lines
737	if fin_row > self.last_row:
738	fin_row = self.last_row
739
740	if self.verbose is True and \
741	0==self.start_row%(( self.last_row+10)/10)==0:
742	print 'Doing %d of %d' %(self.start_row, self.last_row+10)
743	#call stuff
744	pointlist, att_dict, = \
745	_read_pts_file_blocking( self.fid,
746	self.start_row,
747	fin_row,
748	self.blocking_keys
749	)
750	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
751	self.start_row = fin_row
752
753	else:
754	try:
755	pointlist, att_dict, geo_ref, self.file_pointer = \
756	_read_csv_file_blocking( self.file_pointer,
757	self.header[:],
758	max_read_lines=self.max_read_lines,
759	verbose=self.verbose)
760
761	# Check that the zones haven't changed.
762	if geo_ref is not None:
763	geo_ref.reconcile_zones(self.blocking_georef)
764	self.blocking_georef = geo_ref
765	elif self.blocking_georef is not None:
766
767	msg = 'Geo reference given, then not given.'
768	msg += ' This should not happen.'
769	raise ValueError, msg
770	geo = Geospatial_data(pointlist, att_dict, geo_ref)
771	except StopIteration:
772	self.file_pointer.close()
773	del self.header
774	del self.file_pointer
775	raise StopIteration
776	except ANUGAError:
777	self.file_pointer.close()
778	del self.header
779	del self.file_pointer
780	raise
781	return geo
782
783	def _set_using_lat_long(latitudes,
784	longitudes,
785	geo_reference,
786	data_points,
787	points_are_lats_longs):
788	"""
789	if the points has lat long info, assume it is in (lat, long) order.
790	"""
791
792	if geo_reference is not None:
793	msg = """A georeference is specified yet latitude and longitude
794	are also specified!"""
795	raise ValueError, msg
796
797	if data_points is not None and not points_are_lats_longs:
798	msg = """Data points are specified yet latitude and
799	longitude are also specified!"""
800	raise ValueError, msg
801
802	if points_are_lats_longs:
803	if data_points is None:
804	msg = """Data points are not specified !"""
805	raise ValueError, msg
806	lats_longs = ensure_numeric(data_points)
807	latitudes = ravel(lats_longs[:,0:1])
808	longitudes = ravel(lats_longs[:,1:])
809
810	if latitudes is None and longitudes is None:
811	msg = """Latitudes and Longitudes are not."""
812	raise ValueError, msg
813
814	if latitudes is None:
815	msg = """Longitudes are specified yet latitudes aren't!"""
816	raise ValueError, msg
817
818	if longitudes is None:
819	msg = """Latitudes are specified yet longitudes aren't!"""
820	raise ValueError, msg
821
822	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
823	longitudes=longitudes)
824	return data_points, Geo_reference(zone=zone)
825
826	def _read_pts_file(file_name, verbose=False):
827	"""Read .pts NetCDF file
828
829	Return a dic of array of points, and dic of array of attribute
830	eg
831	dic['points'] = [[1.0,2.0],[3.0,5.0]]
832	dic['attributelist']['elevation'] = [[7.0,5.0]
833	"""
834
835	from Scientific.IO.NetCDF import NetCDFFile
836
837	if verbose: print 'Reading ', file_name
838
839
840	# see if the file is there. Throw a QUIET IO error if it isn't
841	fd = open(file_name,'r')
842	fd.close()
843
844	#throws prints to screen if file not present
845	fid = NetCDFFile(file_name, 'r')
846
847	pointlist = array(fid.variables['points'])
848	keys = fid.variables.keys()
849	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
850	try:
851	keys.remove('points')
852	except IOError, e:
853	fid.close()
854	msg = 'Expected keyword "points" but could not find it'
855	raise IOError, msg
856
857	attributes = {}
858	for key in keys:
859	if verbose: print "reading attribute '%s'" %key
860
861	attributes[key] = array(fid.variables[key])
862
863
864	try:
865	geo_reference = Geo_reference(NetCDFObject=fid)
866	except AttributeError, e:
867	geo_reference = None
868
869	fid.close()
870
871	return pointlist, attributes, geo_reference
872
873
874	def _read_csv_file(file_name, verbose=False):
875	"""Read .csv file
876
877	Return a dic of array of points, and dic of array of attribute
878	eg
879	dic['points'] = [[1.0,2.0],[3.0,5.0]]
880	dic['attributelist']['elevation'] = [[7.0,5.0]
881	"""
882
883	#from anuga.shallow_water.data_manager import Exposure_csv
884	#csv =Exposure_csv(file_name)
885
886	file_pointer = open(file_name)
887	header, file_pointer = _read_csv_file_header(file_pointer)
888	try:
889	pointlist, att_dict, geo_ref, file_pointer = \
890	_read_csv_file_blocking( \
891	file_pointer,
892	header,
893	max_read_lines=1e30) #If the file is bigger that this, block..
894	except ANUGAError:
895	file_pointer.close()
896	raise
897	file_pointer.close()
898	return pointlist, att_dict, geo_ref
899
900	CSV_DELIMITER = ','
901	def _read_csv_file_header(file_pointer,
902	delimiter=CSV_DELIMITER,
903	verbose=False):
904
905	"""Read the header of a .csv file
906	Return a list of the header names
907	"""
908	line = file_pointer.readline()
909	header = clean_line(line, delimiter)
910	return header, file_pointer
911
912	def _read_csv_file_blocking(file_pointer, header,
913	delimiter=CSV_DELIMITER,
914	max_read_lines=MAX_READ_LINES,
915	verbose=False):
916
917
918	"""
919	Read the body of a .csv file.
920	header: The list header of the csv file, with the x and y labels.
921	"""
922	points = []
923	pointattributes = []
924	att_dict = {}
925
926	#This is to remove the x and y headers.
927	header = header[:]
928	x_header = header.pop(0)
929	y_header = header.pop(0)
930
931	read_lines = 0
932	while read_lines<max_read_lines:
933	line = file_pointer.readline()
934	#print "line",line
935	numbers = clean_line(line,delimiter)
936	if len(numbers) <= 1:
937	break
938	if line[0] == '#':
939	continue
940	read_lines += 1
941	if True: # remove.. #if numbers != []:
942	try:
943	x = float(numbers[0])
944	y = float(numbers[1])
945	points.append([x,y])
946	numbers.pop(0)
947	numbers.pop(0)
948	if len(header) != len(numbers):
949
950	file_pointer.close()
951	# It might not be a problem with the header
952	#raise TitleAmountError
953	msg = "File load error. There might be a problem with the file header"
954	raise IOError, msg
955	for i,num in enumerate(numbers):
956	num.strip()
957	if num != '\n' and num != '':
958	#attributes.append(float(num))
959	att_dict.setdefault(header[i],[]).append(float(num))
960	#except IOError:
961	except ValueError:
962	raise SyntaxError
963	if points == []:
964	raise StopIteration
965
966
967	pointlist = array(points).astype(Float)
968	for key in att_dict.keys():
969	att_dict[key] = array(att_dict[key]).astype(Float)
970
971	#Do stuff here so the info is in lat's and longs
972	geo_ref = None
973	x_header = lower(x_header[:3])
974	y_header = lower(y_header[:3])
975	if (x_header == 'lon' or x_header == 'lat') and \
976	(y_header == 'lon' or y_header == 'lat'):
977	if x_header == 'lon':
978	longitudes = ravel(pointlist[:,0:1])
979	latitudes = ravel(pointlist[:,1:])
980	else:
981	latitudes = ravel(pointlist[:,0:1])
982	longitudes = ravel(pointlist[:,1:])
983
984	pointlist, geo_ref = _set_using_lat_long(latitudes,
985	longitudes,
986	geo_reference=None,
987	data_points=None,
988	points_are_lats_longs=False)
989	return pointlist, att_dict, geo_ref, file_pointer
990
991	def _read_pts_file_header(fid, verbose=False):
992
993	"""
994	Read the geo_reference of a .pts file
995	"""
996
997	keys = fid.variables.keys()
998	try:
999	keys.remove('points')
1000	except IOError, e:
1001	fid.close()
1002	msg = 'Expected keyword "points" but could not find it'
1003	raise IOError, msg
1004	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1005
1006	try:
1007	geo_reference = Geo_reference(NetCDFObject=fid)
1008	except AttributeError, e:
1009	geo_reference = None
1010
1011	return geo_reference, keys, fid.dimensions['number_of_points']
1012
1013	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1014	#verbose=False):
1015
1016
1017	"""
1018	Read the body of a .csv file.
1019	header: The list header of the csv file, with the x and y labels.
1020	"""
1021
1022	pointlist = array(fid.variables['points'][start_row:fin_row])
1023
1024	attributes = {}
1025	for key in keys:
1026	attributes[key] = array(fid.variables[key][start_row:fin_row])
1027
1028	return pointlist, attributes
1029
1030
1031	def _read_xya_file(fd, delimiter):
1032	points = []
1033	pointattributes = []
1034	title = fd.readline()
1035	att_names = clean_line(title,delimiter)
1036	att_dict = {}
1037	line = fd.readline()
1038	numbers = clean_line(line,delimiter)
1039
1040	while len(numbers) > 1 and line[0] <> '#':
1041	if numbers != []:
1042	try:
1043	x = float(numbers[0])
1044	y = float(numbers[1])
1045	points.append([x,y])
1046	numbers.pop(0)
1047	numbers.pop(0)
1048	if len(att_names) != len(numbers):
1049	fd.close()
1050	# It might not be a problem with the title
1051	#raise TitleAmountError
1052	raise IOError
1053	for i,num in enumerate(numbers):
1054	num.strip()
1055	if num != '\n' and num != '':
1056	#attributes.append(float(num))
1057	att_dict.setdefault(att_names[i],[]).append(float(num))
1058	except ValueError:
1059	raise SyntaxError
1060	line = fd.readline()
1061	numbers = clean_line(line,delimiter)
1062
1063	if line == '':
1064	geo_reference = None
1065	else:
1066	geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
1067
1068
1069	pointlist = array(points).astype(Float)
1070	for key in att_dict.keys():
1071	att_dict[key] = array(att_dict[key]).astype(Float)
1072
1073	return pointlist, att_dict, geo_reference
1074
1075	def _write_pts_file(file_name,
1076	write_data_points,
1077	write_attributes=None,
1078	write_geo_reference=None):
1079	"""
1080	Write .pts NetCDF file
1081
1082	NOTE: Below might not be valid ask Duncan : NB 5/2006
1083
1084	WARNING: This function mangles the point_atts data structure
1085	#F??ME: (DSG)This format has issues.
1086	# There can't be an attribute called points
1087	# consider format change
1088	# method changed by NB not sure if above statement is correct
1089
1090	should create new test for this
1091	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1092	for key in point_atts.keys():
1093	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1094	assert key in legal_keys, msg
1095	"""
1096	from Scientific.IO.NetCDF import NetCDFFile
1097	# NetCDF file definition
1098	outfile = NetCDFFile(file_name, 'w')
1099
1100	#Create new file
1101	outfile.institution = 'Geoscience Australia'
1102	outfile.description = 'NetCDF format for compact and portable storage ' +\
1103	'of spatial point data'
1104
1105	# dimension definitions
1106	shape = write_data_points.shape[0]
1107	outfile.createDimension('number_of_points', shape)
1108	outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1109
1110	# variable definition
1111	outfile.createVariable('points', Float, ('number_of_points',
1112	'number_of_dimensions'))
1113
1114	#create variables
1115	outfile.variables['points'][:] = write_data_points #.astype(Float32)
1116
1117	if write_attributes is not None:
1118	for key in write_attributes.keys():
1119	outfile.createVariable(key, Float, ('number_of_points',))
1120	outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1121
1122	if write_geo_reference is not None:
1123	write_geo_reference.write_NetCDF(outfile)
1124
1125	outfile.close()
1126
1127
1128
1129	def _write_xya_file(file_name,
1130	write_data_points,
1131	write_attributes=None,
1132	write_geo_reference=None,
1133	delimiter=','):
1134	"""
1135	export a file, file_name, with the xya format
1136
1137	"""
1138	points = write_data_points
1139	pointattributes = write_attributes
1140
1141	fd = open(file_name,'w')
1142	titlelist = ""
1143	if pointattributes is not None:
1144	for title in pointattributes.keys():
1145	titlelist = titlelist + title + delimiter
1146	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1147	fd.write(titlelist+"\n")
1148
1149	#<vertex #> <x> <y> [attributes]
1150	for i, vert in enumerate( points):
1151
1152
1153	if pointattributes is not None:
1154	attlist = ","
1155	for att in pointattributes.keys():
1156	attlist = attlist + str(pointattributes[att][i])+ delimiter
1157	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1158	attlist.strip()
1159	else:
1160	attlist = ''
1161
1162	fd.write(str(vert[0]) + delimiter +
1163	str(vert[1]) + attlist + "\n")
1164
1165	if write_geo_reference is not None:
1166	write_geo_reference.write_ASCII(fd)
1167	fd.close()
1168
1169
1170	def _write_csv_file(file_name,
1171	write_data_points,
1172	write_attributes=None,
1173	delimiter=','):
1174	"""
1175	export a file, file_name, with the xya format
1176
1177	"""
1178	points = write_data_points
1179	pointattributes = write_attributes
1180
1181	fd = open(file_name,'w')
1182	titlelist = "x" + delimiter + "y" + delimiter
1183	if pointattributes is not None:
1184	for title in pointattributes.keys():
1185	titlelist = titlelist + title + delimiter
1186	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1187	fd.write(titlelist+"\n")
1188
1189	#<vertex #> <x> <y> [attributes]
1190	for i, vert in enumerate( points):
1191
1192
1193	if pointattributes is not None:
1194	attlist = ","
1195	for att in pointattributes.keys():
1196	attlist = attlist + str(pointattributes[att][i])+ delimiter
1197	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1198	attlist.strip()
1199	else:
1200	attlist = ''
1201
1202	fd.write(str(vert[0]) + delimiter +
1203	str(vert[1]) + attlist + "\n")
1204
1205	fd.close()
1206
1207	def _point_atts2array(point_atts):
1208	point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1209
1210	for key in point_atts['attributelist'].keys():
1211	point_atts['attributelist'][key]=\
1212	array(point_atts['attributelist'][key]).astype(Float)
1213	return point_atts
1214
1215
1216
1217
1218	def geospatial_data2points_dictionary(geospatial_data):
1219	"""Convert geospatial data to points_dictionary
1220	"""
1221
1222	points_dictionary = {}
1223	points_dictionary['pointlist'] = geospatial_data.data_points
1224
1225	points_dictionary['attributelist'] = {}
1226
1227	for attribute_name in geospatial_data.attributes.keys():
1228	val = geospatial_data.attributes[attribute_name]
1229	points_dictionary['attributelist'][attribute_name] = val
1230
1231	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1232
1233	return points_dictionary
1234
1235
1236	def points_dictionary2geospatial_data(points_dictionary):
1237	"""Convert points_dictionary to geospatial data object
1238	"""
1239
1240	msg = 'Points dictionary must have key pointlist'
1241	assert points_dictionary.has_key('pointlist'), msg
1242
1243	msg = 'Points dictionary must have key attributelist'
1244	assert points_dictionary.has_key('attributelist'), msg
1245
1246	if points_dictionary.has_key('geo_reference'):
1247	geo = points_dictionary['geo_reference']
1248	else:
1249	geo = None
1250
1251	return Geospatial_data(points_dictionary['pointlist'],
1252	points_dictionary['attributelist'],
1253	geo_reference = geo)
1254
1255	def clean_line(line,delimiter):
1256	"""Remove whitespace
1257	"""
1258	#print ">%s" %line
1259	line = line.strip()
1260	#print "stripped>%s" %line
1261	numbers = line.split(delimiter)
1262	i = len(numbers) - 1
1263	while i >= 0:
1264	if numbers[i] == '':
1265	numbers.pop(i)
1266	else:
1267	numbers[i] = numbers[i].strip()
1268
1269	i += -1
1270	#for num in numbers:
1271	# print "num>%s<" %num
1272	return numbers
1273
1274	def ensure_absolute(points, geo_reference=None):
1275	"""
1276	This function inputs several formats and
1277	outputs one format. - a numeric array of absolute points.
1278
1279	Inputed formats are;
1280	points: List or numeric array of coordinate pairs [xi, eta] of
1281	points or geospatial object or points file name
1282
1283	mesh_origin: A geo_reference object or 3-tuples consisting of
1284	UTM zone, easting and northing.
1285	If specified vertex coordinates are assumed to be
1286	relative to their respective origins.
1287	"""
1288	if isinstance(points,type('')):
1289	#It's a string
1290	#assume it is a point file
1291	points = Geospatial_data(file_name = points)
1292
1293	if isinstance(points,Geospatial_data):
1294	points = points.get_data_points( \
1295	absolute = True)
1296	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1297	assert geo_reference == None, msg
1298
1299	else:
1300	points = ensure_numeric(points, Float)
1301	if geo_reference is None:
1302	geo = None #Geo_reference()
1303	else:
1304	if isinstance(geo_reference, Geo_reference):
1305	geo = geo_reference
1306	else:
1307	geo = Geo_reference(geo_reference[0],
1308	geo_reference[1],
1309	geo_reference[2])
1310	points = geo.get_absolute(points)
1311	return points
1312
1313
1314	def ensure_geospatial(points, geo_reference=None):
1315	"""
1316	This function inputs several formats and
1317	outputs one format. - a geospatial_data instance.
1318
1319	Inputed formats are;
1320	points: List or numeric array of coordinate pairs [xi, eta] of
1321	points or geospatial object
1322
1323	mesh_origin: A geo_reference object or 3-tuples consisting of
1324	UTM zone, easting and northing.
1325	If specified vertex coordinates are assumed to be
1326	relative to their respective origins.
1327	"""
1328	if isinstance(points,Geospatial_data):
1329	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1330	assert geo_reference == None, msg
1331	return points
1332	else:
1333	points = ensure_numeric(points, Float)
1334	if geo_reference is None:
1335	geo = None
1336	else:
1337	if isinstance(geo_reference, Geo_reference):
1338	geo = geo_reference
1339	else:
1340	geo = Geo_reference(geo_reference[0],
1341	geo_reference[1],
1342	geo_reference[2])
1343	points = Geospatial_data(data_points=points, geo_reference=geo)
1344	return points
1345
1346	#def file2xya(filename):
1347
1348	# G = Geospatial_data(filename)
1349	# G.export_points_file()
1350
1351
1352
1353
1354	if __name__ == "__main__":
1355	pass
1356

Note: See TracBrowser for help on using the repository browser.

Download in other formats: