Context Navigation

source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6306

Last change on this file since 6306 was 6304, checked in by rwilson, 16 years ago
Initial commit of numpy changes. Still a long way to go.
File size: 70.3 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5
6	from sys import maxint
7	from os import access, F_OK, R_OK,remove
8	from types import DictType
9	from warnings import warn
10	from string import lower
11	from RandomArray import randint, seed, get_seed
12	from copy import deepcopy
13	from Scientific.IO.NetCDF import NetCDFFile
14
15	import numpy as num
16
17	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18	from anuga.utilities.numerical_tools import ensure_numeric
19	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22	from anuga.utilities.anuga_exceptions import ANUGAError
23	from anuga.config import points_file_block_line_size as MAX_READ_LINES
24	from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
25	from anuga.config import netcdf_float
26
27	DEFAULT_ATTRIBUTE = 'elevation'
28
29
30	##
31	# @brief ??
32	class Geospatial_data:
33
34	##
35	# @brief
36	# @param data_points Mx2 iterable of tuples or array of x,y coordinates.
37	# @param attributes Associated values for each data point.
38	# @param geo_reference ??
39	# @param default_attribute_name ??
40	# @param file_name
41	# @param latitudes ??
42	# @param longitudes ??
43	# @param points_are_lats_longs True if points are lat/long, not UTM.
44	# @param max_read_lines Size of block to read, if blocking.
45	# @param load_file_now True if blocking but we eant to read file now.
46	# @param verbose True if this class instance is verbose.
47	def __init__(self,
48	data_points=None, # this can also be a points file name
49	attributes=None,
50	geo_reference=None,
51	default_attribute_name=None,
52	file_name=None,
53	latitudes=None,
54	longitudes=None,
55	points_are_lats_longs=False,
56	max_read_lines=None,
57	load_file_now=True,
58	verbose=False):
59	"""Create instance from data points and associated attributes
60
61	data_points: x,y coordinates in meters. Type must be either a
62	sequence of 2-tuples or an Mx2 numeric array of floats. A file name
63	with extension .txt, .cvs or .pts can also be passed in here.
64
65	attributes: Associated values for each data point. The type
66	must be either a list or an array of length M or a dictionary
67	of lists (or arrays) of length M. In the latter case the keys
68	in the dictionary represent the attribute names, in the former
69	the attribute will get the default name "elevation".
70
71	geo_reference: Object representing the origin of the data
72	points. It contains UTM zone, easting and northing and data
73	points are assumed to be relative to this origin.
74	If geo_reference is None, the default geo ref object is used.
75
76	default_attribute_name: Name of default attribute to be used with
77	get_attribute_values. The idea is that the dataset can be
78	equipped with information about which attribute to return.
79	If None, the default is the "first"
80
81	latitudes, longitudes: Vectors of latitudes and longitudes,
82	used to specify location instead of points.
83
84	points_are_lats_longs: Set this as true if the points are actually
85	lats and longs, not UTM
86
87	max_read_lines: The number of rows read into memory when using
88	blocking to read a file.
89
90	load_file_now: If true the file is automatically loaded
91	into the geospatial instance. Used when blocking.
92
93	file_name: Name of input netCDF file or .txt file. netCDF file must
94	have dimensions "points" etc.
95	.txt file is a comma seperated file with x, y and attribute
96	data.
97
98	The first line has the titles of the columns. The first two
99	column titles are checked to see if they start with lat or
100	long (not case sensitive). If so the data is assumed to be
101	latitude and longitude, in decimal format and converted to
102	UTM. Otherwise the first two columns are assumed to be the x
103	and y, and the title names acually used are ignored.
104
105
106	The format for a .txt file is:
107	1st line: [column names]
108	other lines: x y [attributes]
109
110	for example:
111	x, y, elevation, friction
112	0.6, 0.7, 4.9, 0.3
113	1.9, 2.8, 5, 0.3
114	2.7, 2.4, 5.2, 0.3
115
116	The first two columns have to be x, y or lat, long
117	coordinates.
118
119
120	The format for a Points dictionary is:
121	['pointlist'] a 2 column array describing points. 1st column x,
122	2nd column y.
123	['attributelist'], a dictionary of 1D arrays, representing
124	attribute values at the point. The dictionary key is the attribute
125	header.
126	['geo_reference'] a Geo_refernece object. Use if the point
127	information is relative. This is optional.
128	eg
129	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
130	dic['attributelist']['elevation'] = [[7.0,5.0]
131
132	verbose:
133	"""
134
135	if isinstance(data_points, basestring):
136	# assume data_points is really a file name
137	file_name = data_points
138
139	self.set_verbose(verbose)
140	self.geo_reference = None # create the attribute
141	self.file_name = file_name
142
143	if max_read_lines is None:
144	self.max_read_lines = MAX_READ_LINES
145	else:
146	self.max_read_lines = max_read_lines
147
148	if file_name is None:
149	if latitudes is not None \
150	or longitudes is not None \
151	or points_are_lats_longs:
152	data_points, geo_reference = \
153	_set_using_lat_long(latitudes=latitudes,
154	longitudes=longitudes,
155	geo_reference=geo_reference,
156	data_points=data_points,
157	points_are_lats_longs=\
158	points_are_lats_longs)
159	self.check_data_points(data_points)
160	self.set_attributes(attributes)
161	self.set_geo_reference(geo_reference)
162	self.set_default_attribute_name(default_attribute_name)
163	elif load_file_now is True:
164	# watch for case where file name and points,
165	# attributes etc are provided!!
166	# if file name then all provided info will be removed!
167
168	if verbose is True:
169	if file_name is not None:
170	print 'Loading Geospatial data from file: %s' % file_name
171
172	self.import_points_file(file_name, verbose=verbose)
173
174	self.check_data_points(self.data_points)
175	self.set_attributes(self.attributes)
176	self.set_geo_reference(self.geo_reference)
177	self.set_default_attribute_name(default_attribute_name)
178
179	if verbose is True:
180	if file_name is not None:
181	print 'Geospatial data created from file: %s' % file_name
182	if load_file_now is False:
183	print 'Data will be loaded blockwise on demand'
184
185	if file_name.endswith('csv') or file_name.endswith('txt'):
186	pass
187	# This message was misleading.
188	# FIXME (Ole): Are we blocking here or not?
189	# print 'ASCII formats are not that great for '
190	# print 'blockwise reading. Consider storing this'
191	# print 'data as a pts NetCDF format'
192
193	##
194	# @brief Return length of the points set.
195	def __len__(self):
196	return len(self.data_points)
197
198	##
199	# @brief Return a string representation of the points set.
200	def __repr__(self):
201	return str(self.get_data_points(absolute=True))
202
203	##
204	# @brief Check data points.
205	# @param data_points Points data to check and store in instance.
206	# @note Throws ValueError exception if no data.
207	def check_data_points(self, data_points):
208	"""Checks data points"""
209
210	if data_points is None:
211	self.data_points = None
212	msg = 'There is no data or file provided!'
213	raise ValueError, msg
214
215	else:
216	self.data_points = ensure_numeric(data_points)
217	return
218
219	print 'self.data_points=%s' % str(self.data_points)
220	print 'self.data_points.shape=%s' % str(self.data_points.shape)
221	if not (0,) == self.data_points.shape:
222	assert len(self.data_points.shape) == 2
223	assert self.data_points.shape[1] == 2
224
225	##
226	# @brief Check and assign attributes data.
227	# @param attributes Dictionary or scalar to save as .attributes.
228	# @note Throws exception if unable to convert dict keys to numeric.
229	def set_attributes(self, attributes):
230	"""Check and assign attributes dictionary"""
231
232	if attributes is None:
233	self.attributes = None
234	return
235
236	if not isinstance(attributes, DictType):
237	# Convert single attribute into dictionary
238	attributes = {DEFAULT_ATTRIBUTE: attributes}
239
240	# Check input attributes
241	for key in attributes.keys():
242	try:
243	attributes[key] = ensure_numeric(attributes[key])
244	except:
245	msg = ("Attribute '%s' (%s) could not be converted to a"
246	"numeric vector" % (str(key), str(attributes[key])))
247	raise Exception, msg
248
249	self.attributes = attributes
250
251	##
252	# @brief Set the georeference of geospatial data.
253	# @param geo_reference The georeference data to set.
254	# @note Will raise exception if param not instance of Geo_reference.
255	def set_geo_reference(self, geo_reference):
256	"""Set the georeference of geospatial data.
257
258	It can also be used to change the georeference and will ensure that
259	the absolute coordinate values are unchanged.
260	"""
261
262	from anuga.coordinate_transforms.geo_reference import Geo_reference
263
264	if geo_reference is None:
265	# Use default - points are in absolute coordinates
266	geo_reference = Geo_reference()
267
268	# Allow for tuple (zone, xllcorner, yllcorner)
269	geo_reference = ensure_geo_reference(geo_reference)
270
271	if not isinstance(geo_reference, Geo_reference):
272	# FIXME (Ole): This exception will be raised even
273	# if geo_reference is None. Is that the intent Duncan?
274	msg = 'Argument geo_reference must be a valid Geo_reference '
275	msg += 'object or None.'
276	raise Expection, msg
277
278	# If a geo_reference already exists, change the point data according to
279	# the new geo reference
280	if self.geo_reference is not None:
281	self.data_points = self.get_data_points(geo_reference=geo_reference)
282
283	self.geo_reference = geo_reference
284
285	##
286	# @brief Set default attribute name.
287	# @param default_attribute_name The default to save.
288	def set_default_attribute_name(self, default_attribute_name):
289	self.default_attribute_name = default_attribute_name
290
291	##
292	# @brief Set the instance verbose flag.
293	# @param verbose The value to save.
294	# @note Will raise exception if param is not True or False.
295	def set_verbose(self, verbose=False):
296	if verbose in [False, True]:
297	self.verbose = verbose
298	else:
299	msg = 'Illegal value: %s' % str(verbose)
300	raise Exception, msg
301
302	##
303	# @brief Clip geospatial data by a given polygon.
304	# @param polygon The polygon to clip with.
305	# @param closed True if points on clip boundary are not included in result.
306	# @param verbose True if this function is verbose.
307	def clip(self, polygon, closed=True, verbose=False):
308	"""Clip geospatial data by a polygon
309
310	Input
311	polygon - Either a list of points, an Nx2 array or
312	a Geospatial data object.
313	closed - (optional) determine whether points on boundary should be
314	regarded as belonging to the polygon (closed = True)
315	or not (closed = False). Default is True.
316
317	Output
318	New geospatial data object representing points inside
319	specified polygon.
320
321
322	Note - this method is non-destructive and leaves the data in 'self'
323	unchanged
324	"""
325
326	from anuga.utilities.polygon import inside_polygon
327
328	if isinstance(polygon, Geospatial_data):
329	# Polygon is an object - extract points
330	polygon = polygon.get_data_points()
331
332	points = self.get_data_points()
333	inside_indices = inside_polygon(points, polygon, closed, verbose)
334
335	clipped_G = self.get_sample(inside_indices)
336
337	return clipped_G
338
339	##
340	# @brief Clip points data by polygon, return points outside polygon.
341	# @param polygon The polygon to clip with.
342	# @param closed True if points on clip boundary are not included in result.
343	# @param verbose True if this function is verbose.
344	def clip_outside(self, polygon, closed=True, verbose=False):
345	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
346
347	Input
348	polygon - Either a list of points, an Nx2 array or
349	a Geospatial data object.
350	closed - (optional) determine whether points on boundary should be
351	regarded as belonging to the polygon (closed = True)
352	or not (closed = False). Default is True.
353
354	Output
355	Geospatial data object representing point OUTSIDE specified polygon
356	"""
357
358	from anuga.utilities.polygon import outside_polygon
359
360	if isinstance(polygon, Geospatial_data):
361	# Polygon is an object - extract points
362	polygon = polygon.get_data_points()
363
364	points = self.get_data_points()
365	outside_indices = outside_polygon(points, polygon, closed,verbose)
366
367	clipped_G = self.get_sample(outside_indices)
368
369	return clipped_G
370
371	##
372	# @brief Get instance geo_reference data.
373	def get_geo_reference(self):
374	return self.geo_reference
375
376	##
377	# @brief Get coordinates for all data points as an Nx2 array.
378	# @param absolute If True, return UTM, else relative to xll/yll corners.
379	# @param geo_reference If supplied, points are relative to it.
380	# @param as_lat_long If True, return points as lat/lon.
381	# @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
382	# @return A set of data points, in appropriate form.
383	def get_data_points(self, absolute=True, geo_reference=None,
384	as_lat_long=False, isSouthHemisphere=True):
385	"""Get coordinates for all data points as an Nx2 array
386
387	If absolute is False returned coordinates are relative to the
388	internal georeference's xll and yll corners, otherwise
389	absolute UTM coordinates are returned.
390
391	If a geo_reference is passed the points are returned relative
392	to that geo_reference.
393
394	isSH (isSouthHemisphere) is only used when getting data
395	points "as_lat_long" is True and if FALSE will return lats and
396	longs valid for the Northern Hemisphere.
397
398	Default: absolute is True.
399	"""
400
401	if as_lat_long is True:
402	msg = "Points need a zone to be converted into lats and longs"
403	assert self.geo_reference is not None, msg
404	zone = self.geo_reference.get_zone()
405	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
406	lats_longs = []
407	for point in self.get_data_points(True):
408	# UTMtoLL(northing, easting, zone,
409	lat_calced, long_calced = UTMtoLL(point[1], point[0],
410	zone, isSouthHemisphere)
411	lats_longs.append((lat_calced, long_calced)) # to hash
412	return lats_longs
413
414	if absolute is True and geo_reference is None:
415	return self.geo_reference.get_absolute(self.data_points)
416	elif geo_reference is not None:
417	return geo_reference.change_points_geo_ref(self.data_points,
418	self.geo_reference)
419	else:
420	# If absolute is False
421	return self.data_points
422
423	##
424	# @brief Get value for attribute name.
425	# @param attribute_name Name to get value for.
426	# @note If name passed is None, return default attribute value.
427	def get_attributes(self, attribute_name=None):
428	"""Return values for one named attribute.
429
430	If attribute_name is None, default_attribute_name is used
431	"""
432
433	if attribute_name is None:
434	if self.default_attribute_name is not None:
435	attribute_name = self.default_attribute_name
436	else:
437	attribute_name = self.attributes.keys()[0]
438	# above line takes the first one from keys
439
440	if self.verbose is True:
441	print 'Using attribute %s' %attribute_name
442	print 'Available attributes: %s' %(self.attributes.keys())
443
444	msg = 'Attribute name %s does not exist in data set' % attribute_name
445	assert self.attributes.has_key(attribute_name), msg
446
447	return self.attributes[attribute_name]
448
449	##
450	# @brief Get all instance attributes.
451	# @return The instance attribute dictionary, or None if no attributes.
452	def get_all_attributes(self):
453	"""Return values for all attributes.
454	The return value is either None or a dictionary (possibly empty).
455	"""
456
457	return self.attributes
458
459	##
460	# @brief Override __add__() to allow addition of geospatial objects.
461	# @param self This object.
462	# @param other The second object.
463	# @return The new geospatial object.
464	def __add__(self, other):
465	"""Returns the addition of 2 geospatical objects,
466	objects are concatencated to the end of each other
467
468	NOTE: doesn't add if objects contain different
469	attributes
470
471	Always return absolute points!
472	This also means, that if you add None to the object,
473	it will be turned into absolute coordinates
474
475	other can be None in which case nothing is added to self.
476	"""
477
478	# find objects zone and checks if the same
479	geo_ref1 = self.get_geo_reference()
480	zone1 = geo_ref1.get_zone()
481
482	if other is not None:
483	geo_ref2 = other.get_geo_reference()
484	zone2 = geo_ref2.get_zone()
485	geo_ref1.reconcile_zones(geo_ref2)
486	new_points = num.concatenate((self.get_data_points(absolute=True),
487	other.get_data_points(absolute=True)),
488	axis = 0)
489
490	# Concatenate attributes if any
491	if self.attributes is None:
492	if other.attributes is not None:
493	msg = 'Geospatial data must have the same '
494	msg += 'attributes to allow addition.'
495	raise Exception, msg
496
497	new_attributes = None
498	else:
499	new_attributes = {}
500	for x in self.attributes.keys():
501	if other.attributes.has_key(x):
502	attrib1 = self.attributes[x]
503	attrib2 = other.attributes[x]
504	new_attributes[x] = num.concatenate((attrib1, attrib2))
505	else:
506	msg = 'Geospatial data must have the same \n'
507	msg += 'attributes to allow addition.'
508	raise Exception, msg
509	else:
510	# other is None:
511	new_points = self.get_data_points(absolute=True)
512	new_attributes = self.attributes
513
514	# Instantiate new data object and return absolute coordinates
515	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
516	return Geospatial_data(new_points, new_attributes, new_geo_ref)
517
518	##
519	# @brief Override the addition case where LHS isn't geospatial object.
520	# @param self This object.
521	# @param other The second object.
522	# @return The new geospatial object.
523	def __radd__(self, other):
524	"""Handle cases like None + Geospatial_data(...)"""
525
526	return self + other
527
528	###
529	# IMPORT/EXPORT POINTS FILES
530	###
531
532	##
533	# @brief Import a .txt, .csv or .pts points data file.
534	# @param file_name
535	# @param delimiter
536	# @param verbose True if this function is to be verbose.
537	# @note Will throw IOError or SyntaxError if there is a problem.
538	def import_points_file(self, file_name, delimiter=None, verbose=False):
539	""" load an .txt, .csv or .pts file
540
541	Note: will throw an IOError/SyntaxError if it can't load the file.
542	Catch these!
543
544	Post condition: self.attributes dictionary has been set
545	"""
546
547	if access(file_name, F_OK) == 0 :
548	msg = 'File %s does not exist or is not accessible' % file_name
549	raise IOError, msg
550
551	attributes = {}
552	if file_name[-4:]== ".pts":
553	try:
554	data_points, attributes, geo_reference = \
555	_read_pts_file(file_name, verbose)
556	except IOError, e:
557	msg = 'Could not open file %s ' % file_name
558	raise IOError, msg
559	elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
560	try:
561	data_points, attributes, geo_reference = \
562	_read_csv_file(file_name, verbose)
563	except IOError, e:
564	# This should only be if a file is not found
565	msg = 'Could not open file %s. ' % file_name
566	msg += 'Check the file location.'
567	raise IOError, msg
568	except SyntaxError, e:
569	# This should only be if there is a format error
570	msg = 'Problem with format of file %s. \n' %file_name
571	msg += Error_message['IOError']
572	raise SyntaxError, msg
573	else:
574	msg = 'Extension %s is unknown' %file_name[-4:]
575	raise IOError, msg
576
577	self.data_points = data_points
578	self.attributes = attributes
579	self.geo_reference = geo_reference
580
581	##
582	# @brief Write points data to a file (.csv or .pts).
583	# @param file_name Path to file to write.
584	# @param absolute ??
585	# @param as_lat_long ??
586	# @param isSouthHemisphere ??
587	def export_points_file(self, file_name, absolute=True,
588	as_lat_long=False, isSouthHemisphere=True):
589
590	"""write a points file as a text (.csv) or binary (.pts) file
591
592	file_name is the file name, including the extension
593	The point_dict is defined at the top of this file.
594
595	If absolute is True data the xll and yll are added to the points value
596	and the xll and yll of the geo_reference are set to 0.
597
598	If absolute is False data points at returned as relative to the xll
599	and yll and geo_reference remains uneffected
600
601	isSouthHemisphere: is only used when getting data
602	points "as_lat_long" is True and if FALSE will return lats and
603	longs valid for the Northern Hemisphere.
604	"""
605
606	if (file_name[-4:] == ".pts"):
607	if absolute is True:
608	geo_ref = deepcopy(self.geo_reference)
609	geo_ref.xllcorner = 0
610	geo_ref.yllcorner = 0
611	_write_pts_file(file_name,
612	self.get_data_points(absolute),
613	self.get_all_attributes(),
614	geo_ref)
615	else:
616	_write_pts_file(file_name,
617	self.get_data_points(absolute),
618	self.get_all_attributes(),
619	self.get_geo_reference())
620
621	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
622	msg = "ERROR: trying to write a .txt file with relative data."
623	assert absolute, msg
624	_write_csv_file(file_name,
625	self.get_data_points(absolute=True,
626	as_lat_long=as_lat_long,
627	isSouthHemisphere=isSouthHemisphere),
628	self.get_all_attributes(),
629	as_lat_long=as_lat_long)
630
631	elif file_name[-4:] == ".urs" :
632	msg = "ERROR: Can not write a .urs file as a relative file."
633	assert absolute, msg
634	_write_urs_file(file_name,
635	self.get_data_points(as_lat_long=True,
636	isSouthHemisphere=isSouthHemisphere))
637
638	else:
639	msg = 'Unknown file type %s ' %file_name
640	raise IOError, msg
641
642	##
643	# @brief Get a subset of data that is referred to by 'indices'.
644	# @param indices A list of indices to select data subset with.
645	# @return A geospatial object containing data subset.
646	def get_sample(self, indices):
647	""" Returns a object which is a subset of the original
648	and the data points and attributes in this new object refer to
649	the indices provided
650
651	Input
652	indices- a list of integers that represent the new object
653	Output
654	New geospatial data object representing points specified by
655	the indices
656	"""
657
658	# FIXME: add the geo_reference to this
659	points = self.get_data_points()
660	sampled_points = num.take(points, indices)
661
662	attributes = self.get_all_attributes()
663
664	sampled_attributes = {}
665	if attributes is not None:
666	for key, att in attributes.items():
667	sampled_attributes[key] = num.take(att, indices)
668
669	return Geospatial_data(sampled_points, sampled_attributes)
670
671	##
672	# @brief Split one geospatial object into two.
673	# @param factor Relative size to make first result object.
674	# @param seed_num Random 'seed' - used only for unit test.
675	# @param verbose True if this function is to be verbose.
676	# @note Points in each result object are selected randomly.
677	def split(self, factor=0.5, seed_num=None, verbose=False):
678	"""Returns two geospatial_data object, first is the size of the 'factor'
679	smaller the original and the second is the remainder. The two
680	new objects are disjoint sets of each other.
681
682	Points of the two new object have selected RANDOMLY.
683
684	This method create two lists of indices which are passed into
685	get_sample. The lists are created using random numbers, and
686	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
687	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
688
689	Input - the factor which to split the object, if 0.1 then 10% of the
690	together object will be returned
691
692	Output - two geospatial_data objects that are disjoint sets of the
693	original
694	"""
695
696	i=0
697	self_size = len(self)
698	random_list = []
699	remainder_list = []
700	new_size = round(factor*self_size)
701
702	# Find unique random numbers
703	if verbose: print "make unique random number list and get indices"
704
705	total=num.array(range(self_size))
706	total_list = total.tolist()
707
708	if verbose: print "total list len", len(total_list)
709
710	# There will be repeated random numbers however will not be a
711	# problem as they are being 'pop'ed out of array so if there
712	# are two numbers the same they will pop different indicies,
713	# still basically random
714	## create list of non-unquie random numbers
715	if verbose: print "create random numbers list %s long" %new_size
716
717	# Set seed if provided, mainly important for unit test!
718	# plus recalcule seed when no seed provided.
719	if seed_num != None:
720	seed(seed_num, seed_num)
721	else:
722	seed()
723
724	if verbose: print "seed:", get_seed()
725
726	random_num = randint(0, self_size-1, (int(new_size),))
727	random_num = random_num.tolist()
728
729	# need to sort and reverse so the pop() works correctly
730	random_num.sort()
731	random_num.reverse()
732
733	if verbose: print "make random number list and get indices"
734
735	j=0
736	k=1
737	remainder_list = total_list[:]
738
739	# pops array index (random_num) from remainder_list
740	# (which starts as the
741	# total_list and appends to random_list
742	random_num_len = len(random_num)
743	for i in random_num:
744	random_list.append(remainder_list.pop(i))
745	j += 1
746	# prints progress
747	if verbose and round(random_num_len/10*k) == j:
748	print '(%s/%s)' % (j, random_num_len)
749	k += 1
750
751	# FIXME: move to tests, it might take a long time
752	# then create an array of random lenght between 500 and 1000,
753	# and use a random factor between 0 and 1
754	# setup for assertion
755	test_total = random_list[:]
756	test_total.extend(remainder_list)
757	test_total.sort()
758	msg = 'The two random lists made from the original list when added ' \
759	'together DO NOT equal the original list'
760	assert total_list == test_total, msg
761
762	# Get new samples
763	if verbose: print "get values of indices for random list"
764	G1 = self.get_sample(random_list)
765	if verbose: print "get values of indices for opposite of random list"
766	G2 = self.get_sample(remainder_list)
767
768	return G1, G2
769
770	##
771	# @brief Allow iteration over this object.
772	def __iter__(self):
773	"""Read in the header, number_of_points and save the
774	file pointer position
775	"""
776	from Scientific.IO.NetCDF import NetCDFFile
777	# FIXME - what to do if the file isn't there
778
779	# FIXME (Ole): Shouldn't this go into the constructor?
780	# This method acts like the constructor when blocking.
781	# ... and shouldn't it be called block_size?
782	#
783	if self.max_read_lines is None:
784	self.max_read_lines = MAX_READ_LINES
785
786	if self.file_name[-4:] == ".pts":
787	# See if the file is there. Throw a QUIET IO error if it isn't
788	fd = open(self.file_name,'r')
789	fd.close()
790
791	# Throws prints to screen if file not present
792	self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
793
794	(self.blocking_georef,
795	self.blocking_keys,
796	self.number_of_points) = _read_pts_file_header(self.fid,
797	self.verbose)
798	self.start_row = 0
799	self.last_row = self.number_of_points
800	self.show_verbose = 0
801	self.verbose_block_size = (self.last_row + 10)/10
802	self.block_number = 0
803	self.number_of_blocks = self.number_of_points/self.max_read_lines
804	# This computes the number of full blocks. The last block may be
805	# smaller and won't be ircluded in this estimate.
806
807	if self.verbose is True:
808	print 'Reading %d points (in ~%d blocks) from file %s. ' \
809	% (self.number_of_points,
810	self.number_of_blocks,
811	self.file_name),
812	print 'Each block consists of %d data points' \
813	% self.max_read_lines
814
815	else:
816	# Assume the file is a csv file
817	file_pointer = open(self.file_name)
818	self.header, self.file_pointer = _read_csv_file_header(file_pointer)
819	self.blocking_georef = None # Used for reconciling zones
820
821	return self
822
823	##
824	# @brief Read another block into the instance.
825	def next(self):
826	"""read a block, instanciate a new geospatial and return it"""
827
828	if self.file_name[-4:] == ".pts":
829	if self.start_row == self.last_row:
830	# Read the end of the file last iteration
831	# Remove blocking attributes
832	self.fid.close()
833	del self.max_read_lines
834	del self.blocking_georef
835	del self.last_row
836	del self.start_row
837	del self.blocking_keys
838	del self.fid
839	raise StopIteration
840	fin_row = self.start_row + self.max_read_lines
841	if fin_row > self.last_row:
842	fin_row = self.last_row
843
844	if self.verbose is True:
845	if self.show_verbose >= self.start_row \
846	and self.show_verbose < fin_row:
847	print 'Reading block %d (points %d to %d) out of %d'\
848	%(self.block_number,
849	self.start_row,
850	fin_row,
851	self.number_of_blocks)
852
853	self.show_verbose += max(self.max_read_lines,
854	self.verbose_block_size)
855
856
857	# Read next block
858	pointlist, att_dict, = _read_pts_file_blocking(self.fid,
859	self.start_row,
860	fin_row,
861	self.blocking_keys)
862
863	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
864	self.start_row = fin_row
865
866	self.block_number += 1
867
868	else:
869	# Assume the file is a csv file
870	try:
871	(pointlist,
872	att_dict,
873	geo_ref,
874	self.file_pointer) = \
875	_read_csv_file_blocking(self.file_pointer,
876	self.header[:],
877	max_read_lines=\
878	self.max_read_lines,
879	verbose=self.verbose)
880
881	# Check that the zones haven't changed.
882	if geo_ref is not None:
883	geo_ref.reconcile_zones(self.blocking_georef)
884	self.blocking_georef = geo_ref
885	elif self.blocking_georef is not None:
886	msg = 'Geo reference given, then not given.'
887	msg += ' This should not happen.'
888	raise ValueError, msg
889	geo = Geospatial_data(pointlist, att_dict, geo_ref)
890	except StopIteration:
891	self.file_pointer.close()
892	del self.header
893	del self.file_pointer
894	raise StopIteration
895	except ANUGAError:
896	self.file_pointer.close()
897	del self.header
898	del self.file_pointer
899	raise
900	except SyntaxError:
901	self.file_pointer.close()
902	del self.header
903	del self.file_pointer
904	# This should only be if there is a format error
905	msg = 'Could not open file %s. \n' % self.file_name
906	msg += Error_message['IOError']
907	raise SyntaxError, msg
908	return geo
909
910
911	##################### Error messages ###########
912	Error_message = {}
913	Em = Error_message
914	Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
915	Em['IOError'] += " 1st line: [column names]\n"
916	Em['IOError'] += " other lines: [x value], [y value], [attributes]\n"
917	Em['IOError'] += "\n"
918	Em['IOError'] += " for example:\n"
919	Em['IOError'] += " x, y, elevation, friction\n"
920	Em['IOError'] += " 0.6, 0.7, 4.9, 0.3\n"
921	Em['IOError'] += " 1.9, 2.8, 5, 0.3\n"
922	Em['IOError'] += " 2.7, 2.4, 5.2, 0.3\n"
923	Em['IOError'] += "\n"
924	Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
925	Em['IOError'] += "The attribute values must be numeric.\n"
926
927	##
928	# @brief ??
929	# @param latitudes ??
930	# @param longitudes ??
931	# @param geo_reference ??
932	# @param data_points ??
933	# @param points_are_lats_longs ??
934	def _set_using_lat_long(latitudes,
935	longitudes,
936	geo_reference,
937	data_points,
938	points_are_lats_longs):
939	"""If the points has lat long info, assume it is in (lat, long) order."""
940
941	if geo_reference is not None:
942	msg = "A georeference is specified yet latitude and longitude " \
943	"are also specified!"
944	raise ValueError, msg
945
946	if data_points is not None and not points_are_lats_longs:
947	msg = "Data points are specified yet latitude and longitude are " \
948	"also specified."
949	raise ValueError, msg
950
951	if points_are_lats_longs:
952	if data_points is None:
953	msg = "Data points are not specified."
954	raise ValueError, msg
955	lats_longs = ensure_numeric(data_points)
956	latitudes = num.ravel(lats_longs[:,0:1])
957	longitudes = num.ravel(lats_longs[:,1:])
958
959	if latitudes is None and longitudes is None:
960	msg = "Latitudes and Longitudes are not specified."
961	raise ValueError, msg
962
963	if latitudes is None:
964	msg = "Longitudes are specified yet latitudes aren't."
965	raise ValueError, msg
966
967	if longitudes is None:
968	msg = "Latitudes are specified yet longitudes aren't."
969	raise ValueError, msg
970
971	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
972	longitudes=longitudes)
973	return data_points, Geo_reference(zone=zone)
974
975
976	##
977	# @brief Read a .pts data file.
978	# @param file_name Path to file to read.
979	# @param verbose True if this function is to be verbose.
980	# @return (pointlist, attributes, geo_reference)
981	def _read_pts_file(file_name, verbose=False):
982	"""Read .pts NetCDF file
983
984	Return a dic of array of points, and dic of array of attribute
985	eg
986	dic['points'] = [[1.0,2.0],[3.0,5.0]]
987	dic['attributelist']['elevation'] = [[7.0,5.0]]
988	"""
989
990	from Scientific.IO.NetCDF import NetCDFFile
991
992	if verbose: print 'Reading ', file_name
993
994	# See if the file is there. Throw a QUIET IO error if it isn't
995	fd = open(file_name,'r')
996	fd.close()
997
998	# Throws prints to screen if file not present
999	fid = NetCDFFile(file_name, netcdf_mode_r)
1000
1001	pointlist = num.array(fid.variables['points'])
1002	keys = fid.variables.keys()
1003
1004	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1005
1006	try:
1007	keys.remove('points')
1008	except IOError, e:
1009	fid.close()
1010	msg = "Expected keyword 'points' but could not find it"
1011	raise IOError, msg
1012
1013	attributes = {}
1014	for key in keys:
1015	if verbose: print "reading attribute '%s'" % key
1016
1017	attributes[key] = num.array(fid.variables[key])
1018
1019	try:
1020	geo_reference = Geo_reference(NetCDFObject=fid)
1021	except AttributeError, e:
1022	geo_reference = None
1023
1024	fid.close()
1025
1026	return pointlist, attributes, geo_reference
1027
1028
1029	##
1030	# @brief Read a .csv data file.
1031	# @param file_name Path to the .csv file to read.
1032	# @param verbose True if this function is to be verbose.
1033	def _read_csv_file(file_name, verbose=False):
1034	"""Read .csv file
1035
1036	Return a dic of array of points, and dic of array of attribute
1037	eg
1038	dic['points'] = [[1.0,2.0],[3.0,5.0]]
1039	dic['attributelist']['elevation'] = [[7.0,5.0]]
1040	"""
1041
1042	file_pointer = open(file_name)
1043	header, file_pointer = _read_csv_file_header(file_pointer)
1044	try:
1045	(pointlist,
1046	att_dict,
1047	geo_ref,
1048	file_pointer) = _read_csv_file_blocking(file_pointer,
1049	header,
1050	max_read_lines=1e30)
1051	# If the file is bigger that this, block..
1052	# FIXME (Ole) What's up here?
1053	except ANUGAError:
1054	file_pointer.close()
1055	raise
1056
1057	file_pointer.close()
1058
1059	return pointlist, att_dict, geo_ref
1060
1061
1062	##
1063	# @brief Read a .csv file header.
1064	# @param file_pointer Open descriptor of the file to read.
1065	# @param delimiter Header line delimiter string, split on this string.
1066	# @param verbose True if this function is to be verbose.
1067	# @return A tuple of (<cleaned header string>, <input file_pointer>)
1068
1069	CSV_DELIMITER = ','
1070
1071	def _read_csv_file_header(file_pointer,
1072	delimiter=CSV_DELIMITER,
1073	verbose=False):
1074	"""Read the header of a .csv file
1075	Return a list of the header names
1076	"""
1077
1078	line = file_pointer.readline().strip()
1079	header = clean_line(line, delimiter)
1080
1081	return header, file_pointer
1082
1083	##
1084	# @brief Read a .csv file, with blocking.
1085	# @param file_pointer Open descriptor of the file to read.
1086	# @param header List of already read .csv header fields.
1087	# @param delimiter Delimiter string header was split on.
1088	# @param max_read_lines The max number of lines to read before blocking.
1089	# @param verbose True if this function is to be verbose.
1090	# @note Will throw IndexError, SyntaxError exceptions.
1091	def _read_csv_file_blocking(file_pointer,
1092	header,
1093	delimiter=CSV_DELIMITER,
1094	max_read_lines=MAX_READ_LINES,
1095	verbose=False):
1096	"""Read the body of a .csv file.
1097	header: The list header of the csv file, with the x and y labels.
1098	"""
1099
1100	points = []
1101	pointattributes = []
1102	att_dict = {}
1103
1104	# This is to remove the x and y headers.
1105	header = header[:]
1106	try:
1107	x_header = header.pop(0)
1108	y_header = header.pop(0)
1109	except IndexError:
1110	# if there are not two columns this will occur.
1111	# eg if it is a space seperated file
1112	raise SyntaxError
1113
1114	read_lines = 0
1115	while read_lines < max_read_lines:
1116	line = file_pointer.readline()
1117	numbers = clean_line(line, delimiter)
1118	if len(numbers) <= 1:
1119	break
1120	if line[0] == '#':
1121	continue
1122
1123	read_lines += 1
1124
1125	try:
1126	x = float(numbers[0])
1127	y = float(numbers[1])
1128	points.append([x,y])
1129	numbers.pop(0)
1130	numbers.pop(0)
1131	if len(header) != len(numbers):
1132	file_pointer.close()
1133	msg = "File load error. " \
1134	"There might be a problem with the file header."
1135	raise SyntaxError, msg
1136	for i,n in enumerate(numbers):
1137	n.strip()
1138	if n != '\n' and n != '':
1139	#attributes.append(float(n))
1140	att_dict.setdefault(header[i],[]).append(float(n))
1141	except ValueError:
1142	raise SyntaxError
1143
1144	if points == []:
1145	raise StopIteration
1146
1147	pointlist = num.array(points, num.float)
1148	for key in att_dict.keys():
1149	att_dict[key] = num.array(att_dict[key], num.float)
1150
1151	# Do stuff here so the info is in lat's and longs
1152	geo_ref = None
1153	x_header = lower(x_header[:3])
1154	y_header = lower(y_header[:3])
1155	if (x_header == 'lon' or x_header == 'lat') \
1156	and (y_header == 'lon' or y_header == 'lat'):
1157	if x_header == 'lon':
1158	longitudes = num.ravel(pointlist[:,0:1])
1159	latitudes = num.ravel(pointlist[:,1:])
1160	else:
1161	latitudes = num.ravel(pointlist[:,0:1])
1162	longitudes = num.ravel(pointlist[:,1:])
1163
1164	pointlist, geo_ref = _set_using_lat_long(latitudes,
1165	longitudes,
1166	geo_reference=None,
1167	data_points=None,
1168	points_are_lats_longs=False)
1169
1170	return pointlist, att_dict, geo_ref, file_pointer
1171
1172
1173	##
1174	# @brief Read a .pts file header.
1175	# @param fid Handle to the open .pts file.
1176	# @param verbose True if the function is to be verbose.
1177	# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1178	# @note Will throw IOError and AttributeError exceptions.
1179	def _read_pts_file_header(fid, verbose=False):
1180	"""Read the geo_reference and number_of_points from a .pts file"""
1181
1182	keys = fid.variables.keys()
1183	try:
1184	keys.remove('points')
1185	except IOError, e:
1186	fid.close()
1187	msg = "Expected keyword 'points' but could not find it."
1188	raise IOError, msg
1189
1190	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1191
1192	try:
1193	geo_reference = Geo_reference(NetCDFObject=fid)
1194	except AttributeError, e:
1195	geo_reference = None
1196
1197	return geo_reference, keys, fid.dimensions['number_of_points']
1198
1199
1200	##
1201	# @brief Read the body of a .csf file, with blocking.
1202	# @param fid Handle to already open file.
1203	# @param start_row Start row index of points to return.
1204	# @param fin_row End row index of points to return.
1205	# @param keys Iterable of keys to return.
1206	# @return Tuple of (pointlist, attributes).
1207	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1208	"""Read the body of a .csv file."""
1209
1210	pointlist = num.array(fid.variables['points'][start_row:fin_row])
1211
1212	attributes = {}
1213	for key in keys:
1214	attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1215
1216	return pointlist, attributes
1217
1218
1219	##
1220	# @brief Write a .pts data file.
1221	# @param file_name Path to the file to write.
1222	# @param write_data_points Data points to write.
1223	# @param write_attributes Attributes to write.
1224	# @param write_geo_reference Georef to write.
1225	def _write_pts_file(file_name,
1226	write_data_points,
1227	write_attributes=None,
1228	write_geo_reference=None):
1229	"""Write .pts NetCDF file
1230
1231	NOTE: Below might not be valid ask Duncan : NB 5/2006
1232
1233	WARNING: This function mangles the point_atts data structure
1234	# F??ME: (DSG)This format has issues.
1235	# There can't be an attribute called points
1236	# consider format change
1237	# method changed by NB not sure if above statement is correct
1238
1239	should create new test for this
1240	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1241	for key in point_atts.keys():
1242	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1243	assert key in legal_keys, msg
1244	"""
1245
1246	from Scientific.IO.NetCDF import NetCDFFile
1247
1248	# NetCDF file definition
1249	outfile = NetCDFFile(file_name, netcdf_mode_w)
1250
1251	# Create new file
1252	outfile.institution = 'Geoscience Australia'
1253	outfile.description = 'NetCDF format for compact and portable storage ' \
1254	'of spatial point data'
1255
1256	# Dimension definitions
1257	shape = write_data_points.shape[0]
1258	outfile.createDimension('number_of_points', shape)
1259	outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1260
1261	# Variable definition
1262	outfile.createVariable('points', netcdf_float, ('number_of_points',
1263	'number_of_dimensions'))
1264
1265	# create variables
1266	outfile.variables['points'][:] = write_data_points
1267
1268	if write_attributes is not None:
1269	for key in write_attributes.keys():
1270	outfile.createVariable(key, netcdf_float, ('number_of_points',))
1271	outfile.variables[key][:] = write_attributes[key]
1272
1273	if write_geo_reference is not None:
1274	write_NetCDF_georeference(write_geo_reference, outfile)
1275
1276	outfile.close()
1277
1278
1279	##
1280	# @brief Write a .csv data file.
1281	# @param file_name Path to the file to write.
1282	# @param write_data_points Data points to write.
1283	# @param write_attributes Attributes to write.
1284	# @param as_lat_long True if points are lat/lon, else x/y.
1285	# @param delimiter The CSV delimiter to use.
1286	def _write_csv_file(file_name,
1287	write_data_points,
1288	write_attributes=None,
1289	as_lat_long=False,
1290	delimiter=','):
1291	"""Write a .csv file."""
1292
1293	points = write_data_points
1294	pointattributes = write_attributes
1295
1296	fd = open(file_name, 'w')
1297
1298	if as_lat_long:
1299	titlelist = "latitude" + delimiter + "longitude" + delimiter
1300	else:
1301	titlelist = "x" + delimiter + "y" + delimiter
1302
1303	if pointattributes is not None:
1304	for title in pointattributes.keys():
1305	titlelist = titlelist + title + delimiter
1306	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1307
1308	fd.write(titlelist + "\n")
1309
1310	# <x/lat> <y/long> [attributes]
1311	for i, vert in enumerate( points):
1312	if pointattributes is not None:
1313	attlist = ","
1314	for att in pointattributes.keys():
1315	attlist = attlist + str(pointattributes[att][i]) + delimiter
1316	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1317	attlist.strip()
1318	else:
1319	attlist = ''
1320
1321	fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1322
1323	fd.close()
1324
1325
1326	##
1327	# @brief Write a URS file.
1328	# @param file_name The path of the file to write.
1329	# @param points
1330	# @param delimiter
1331	def _write_urs_file(file_name, points, delimiter=' '):
1332	"""Write a URS format file.
1333	export a file, file_name, with the urs format
1334	the data points are in lats and longs
1335	"""
1336
1337	fd = open(file_name, 'w')
1338
1339	# first line is # points
1340	fd.write(str(len(points)) + "\n")
1341
1342	# <lat> <long> <id#>
1343	for i, vert in enumerate( points):
1344	fd.write(str(round(vert[0],7)) + delimiter +
1345	str(round(vert[1],7)) + delimiter + str(i) + "\n")
1346
1347	fd.close()
1348
1349
1350	##
1351	# @brief ??
1352	# @param point_atts ??
1353	# @return ??
1354	def _point_atts2array(point_atts):
1355	point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1356
1357	for key in point_atts['attributelist'].keys():
1358	point_atts['attributelist'][key] = \
1359	num.array(point_atts['attributelist'][key], num.float)
1360
1361	return point_atts
1362
1363
1364	##
1365	# @brief Convert geospatial object to a points dictionary.
1366	# @param geospatial_data The geospatial object to convert.
1367	# @return A points dictionary.
1368	def geospatial_data2points_dictionary(geospatial_data):
1369	"""Convert geospatial data to points_dictionary"""
1370
1371	points_dictionary = {}
1372	points_dictionary['pointlist'] = geospatial_data.data_points
1373
1374	points_dictionary['attributelist'] = {}
1375
1376	for attribute_name in geospatial_data.attributes.keys():
1377	val = geospatial_data.attributes[attribute_name]
1378	points_dictionary['attributelist'][attribute_name] = val
1379
1380	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1381
1382	return points_dictionary
1383
1384
1385	##
1386	# @brief Convert a points dictionary to a geospatial object.
1387	# @param points_dictionary A points dictionary to convert.
1388	def points_dictionary2geospatial_data(points_dictionary):
1389	"""Convert points_dictionary to geospatial data object"""
1390
1391	msg = 'Points dictionary must have key pointlist'
1392	assert points_dictionary.has_key('pointlist'), msg
1393
1394	msg = 'Points dictionary must have key attributelist'
1395	assert points_dictionary.has_key('attributelist'), msg
1396
1397	if points_dictionary.has_key('geo_reference'):
1398	geo = points_dictionary['geo_reference']
1399	else:
1400	geo = None
1401
1402	return Geospatial_data(points_dictionary['pointlist'],
1403	points_dictionary['attributelist'],
1404	geo_reference = geo)
1405
1406
1407	##
1408	# @brief Split a string into 'clean' fields.
1409	# @param str The string to process.
1410	# @param delimiter The delimiter string to split 'line' with.
1411	# @return A list of 'cleaned' field strings.
1412	# @note Any fields that were initially zero length will be removed.
1413	# @note If a field contains '\n' it isn't zero length.
1414	def clean_line(str, delimiter):
1415	"""Split string on given delimiter, remove whitespace from each field."""
1416
1417	return [x.strip() for x in str.split(delimiter) if x != '']
1418
1419
1420	##
1421	# @brief Ensure that points are in absolute coordinates.
1422	# @param points A list or array of points to check, or geospatial object.
1423	# @param geo_reference If supplied,
1424	# @return ??
1425	def ensure_absolute(points, geo_reference=None):
1426	"""Ensure that points are in absolute coordinates.
1427
1428	This function inputs several formats and
1429	outputs one format. - a numeric array of absolute points.
1430
1431	Input formats are;
1432	points: List or numeric array of coordinate pairs [xi, eta] of
1433	points or geospatial object or points file name
1434
1435	mesh_origin: A geo_reference object or 3-tuples consisting of
1436	UTM zone, easting and northing.
1437	If specified vertex coordinates are assumed to be
1438	relative to their respective origins.
1439	"""
1440
1441	# Input check
1442	if isinstance(points, basestring):
1443	# It's a string - assume it is a point file
1444	points = Geospatial_data(file_name=points)
1445
1446	if isinstance(points, Geospatial_data):
1447	points = points.get_data_points(absolute=True)
1448	msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1449	assert geo_reference == None, msg
1450	else:
1451	points = ensure_numeric(points, num.float)
1452
1453	# Sort of geo_reference and convert points
1454	if geo_reference is None:
1455	geo = None # Geo_reference()
1456	else:
1457	if isinstance(geo_reference, Geo_reference):
1458	geo = geo_reference
1459	else:
1460	geo = Geo_reference(geo_reference[0],
1461	geo_reference[1],
1462	geo_reference[2])
1463	points = geo.get_absolute(points)
1464
1465	return points
1466
1467
1468	##
1469	# @brief
1470	# @param points
1471	# @param geo_reference
1472	# @return A geospatial object.
1473	def ensure_geospatial(points, geo_reference=None):
1474	"""Convert various data formats to a geospatial_data instance.
1475
1476	Inputed formats are;
1477	points: List or numeric array of coordinate pairs [xi, eta] of
1478	points or geospatial object
1479
1480	mesh_origin: A geo_reference object or 3-tuples consisting of
1481	UTM zone, easting and northing.
1482	If specified vertex coordinates are assumed to be
1483	relative to their respective origins.
1484	"""
1485
1486	# Input check
1487	if isinstance(points, Geospatial_data):
1488	msg = "Use a Geospatial_data object or a mesh origin, not both."
1489	assert geo_reference is None, msg
1490	return points
1491	else:
1492	# List or numeric array of absolute points
1493	points = ensure_numeric(points, num.float)
1494
1495	# Sort out geo reference
1496	if geo_reference is None:
1497	geo = None
1498	else:
1499	if isinstance(geo_reference, Geo_reference):
1500	geo = geo_reference
1501	else:
1502	geo = Geo_reference(geo_reference[0],
1503	geo_reference[1],
1504	geo_reference[2])
1505
1506	# Create Geospatial_data object with appropriate geo reference and return
1507	points = Geospatial_data(data_points=points, geo_reference=geo)
1508
1509	return points
1510
1511
1512	##
1513	# @brief
1514	# @param data_file
1515	# @param alpha_list
1516	# @param mesh_file
1517	# @param boundary_poly
1518	# @param mesh_resolution
1519	# @param north_boundary
1520	# @param south_boundary
1521	# @param east_boundary
1522	# @param west_boundary
1523	# @param plot_name
1524	# @param split_factor
1525	# @param seed_num
1526	# @param cache
1527	# @param verbose
1528	def find_optimal_smoothing_parameter(data_file,
1529	alpha_list=None,
1530	mesh_file=None,
1531	boundary_poly=None,
1532	mesh_resolution=100000,
1533	north_boundary=None,
1534	south_boundary=None,
1535	east_boundary=None,
1536	west_boundary=None,
1537	plot_name='all_alphas',
1538	split_factor=0.1,
1539	seed_num=None,
1540	cache=False,
1541	verbose=False):
1542	"""Removes a small random sample of points from 'data_file'.
1543	Then creates models with different alpha values from 'alpha_list' and
1544	cross validates the predicted value to the previously removed point data.
1545	Returns the alpha value which has the smallest covariance.
1546
1547	data_file: must not contain points outside the boundaries defined
1548	and it either a pts, txt or csv file.
1549
1550	alpha_list: the alpha values to test in a single list
1551
1552	mesh_file: name of the created mesh file or if passed in will read it.
1553	NOTE, if there is a mesh file mesh_resolution,
1554	north_boundary, south... etc will be ignored.
1555
1556	mesh_resolution: the maximum area size for a triangle
1557
1558	north_boundary... west_boundary: the value of the boundary
1559
1560	plot_name: the name for the plot contain the results
1561
1562	seed_num: the seed to the random number generator
1563
1564	USAGE:
1565	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1566	alpha_list=[0.0001, 0.01, 1],
1567	mesh_file=None,
1568	mesh_resolution=3,
1569	north_boundary=5,
1570	south_boundary=-5,
1571	east_boundary=5,
1572	west_boundary=-5,
1573	plot_name='all_alphas',
1574	seed_num=100000,
1575	verbose=False)
1576
1577	OUTPUT: returns the minumum normalised covalance calculate AND the
1578	alpha that created it. PLUS writes a plot of the results
1579
1580	NOTE: code will not work if the data_file extent is greater than the
1581	boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1582	"""
1583
1584	from anuga.shallow_water import Domain
1585	from anuga.geospatial_data.geospatial_data import Geospatial_data
1586	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1587	from anuga.utilities.numerical_tools import cov
1588	from anuga.utilities.polygon import is_inside_polygon
1589	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1590
1591
1592	attribute_smoothed='elevation'
1593
1594	if mesh_file is None:
1595	if verbose: print "building mesh"
1596	mesh_file = 'temp.msh'
1597
1598	if north_boundary is None or south_boundary is None \
1599	or east_boundary is None or west_boundary is None:
1600	no_boundary = True
1601	else:
1602	no_boundary = False
1603
1604	if no_boundary is True:
1605	msg = 'All boundaries must be defined'
1606	raise Expection, msg
1607
1608	poly_topo = [[east_boundary,south_boundary],
1609	[east_boundary,north_boundary],
1610	[west_boundary,north_boundary],
1611	[west_boundary,south_boundary]]
1612
1613	create_mesh_from_regions(poly_topo,
1614	boundary_tags={'back': [2],
1615	'side': [1,3],
1616	'ocean': [0]},
1617	maximum_triangle_area=mesh_resolution,
1618	filename=mesh_file,
1619	use_cache=cache,
1620	verbose=verbose)
1621
1622	else: # if mesh file provided
1623	# test mesh file exists?
1624	if verbose: "reading from file: %s" % mesh_file
1625	if access(mesh_file,F_OK) == 0:
1626	msg = "file %s doesn't exist!" % mesh_file
1627	raise IOError, msg
1628
1629	# split topo data
1630	if verbose: print 'Reading elevation file: %s' % data_file
1631	G = Geospatial_data(file_name = data_file)
1632	if verbose: print 'Start split'
1633	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1634	if verbose: print 'Finish split'
1635	points = G_small.get_data_points()
1636
1637	if verbose: print "Number of points in sample to compare: ", len(points)
1638
1639	if alpha_list == None:
1640	alphas = [0.001,0.01,100]
1641	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1642	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1643	else:
1644	alphas = alpha_list
1645
1646	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1647	# 4 onwards is the elevation_predicted using the alpha, which will
1648	# be compared later against the real removed data
1649	data = num.array([], dtype=num.float)
1650
1651	data=num.resize(data, (len(points), 3+len(alphas)))
1652
1653	# gets relative point from sample
1654	data[:,0] = points[:,0]
1655	data[:,1] = points[:,1]
1656	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1657	data[:,2] = elevation_sample
1658
1659	normal_cov=num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1660
1661	if verbose: print 'Setup computational domains with different alphas'
1662
1663	for i, alpha in enumerate(alphas):
1664	# add G_other data to domains with different alphas
1665	if verbose:
1666	print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1667	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1668	if verbose: print domain.statistics()
1669	domain.set_quantity(attribute_smoothed,
1670	geospatial_data=G_other,
1671	use_cache=cache,
1672	verbose=verbose,
1673	alpha=alpha)
1674
1675	# Convert points to geospatial data for use with get_values below
1676	points_geo = Geospatial_data(points, domain.geo_reference)
1677
1678	# returns the predicted elevation of the points that were "split" out
1679	# of the original data set for one particular alpha
1680	if verbose: print 'Get predicted elevation for location to be compared'
1681	elevation_predicted = \
1682	domain.quantities[attribute_smoothed].\
1683	get_values(interpolation_points=points_geo)
1684
1685	# add predicted elevation to array that starts with x, y, z...
1686	data[:,i+3] = elevation_predicted
1687
1688	sample_cov = cov(elevation_sample)
1689	ele_cov = cov(elevation_sample - elevation_predicted)
1690	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1691
1692	if verbose:
1693	print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1694	normal_cov[i][1]
1695	print '-------------------------------------------- \n'
1696
1697	normal_cov0 = normal_cov[:,0]
1698	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1699
1700	if plot_name is not None:
1701	from pylab import savefig, semilogx, loglog
1702
1703	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1704	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1705	savefig(plot_name, dpi=300)
1706
1707	if mesh_file == 'temp.msh':
1708	remove(mesh_file)
1709
1710	if verbose:
1711	print 'Final results:'
1712	for i, alpha in enumerate(alphas):
1713	print 'covariance for alpha %s = %s ' \
1714	% (normal_cov[i][0], normal_cov[i][1])
1715	print '\n Optimal alpha is: %s ' \
1716	% normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0]
1717
1718	# covariance and optimal alpha
1719	return (min(normal_cov_new[:,1]),
1720	normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1721
1722
1723	##
1724	# @brief
1725	# @param data_file
1726	# @param alpha_list
1727	# @param mesh_file
1728	# @param boundary_poly
1729	# @param mesh_resolution
1730	# @param north_boundary
1731	# @param south_boundary
1732	# @param east_boundary
1733	# @param west_boundary
1734	# @param plot_name
1735	# @param split_factor
1736	# @param seed_num
1737	# @param cache
1738	# @param verbose
1739	def old_find_optimal_smoothing_parameter(data_file,
1740	alpha_list=None,
1741	mesh_file=None,
1742	boundary_poly=None,
1743	mesh_resolution=100000,
1744	north_boundary=None,
1745	south_boundary=None,
1746	east_boundary=None,
1747	west_boundary=None,
1748	plot_name='all_alphas',
1749	split_factor=0.1,
1750	seed_num=None,
1751	cache=False,
1752	verbose=False):
1753	"""
1754	data_file: must not contain points outside the boundaries defined
1755	and it either a pts, txt or csv file.
1756
1757	alpha_list: the alpha values to test in a single list
1758
1759	mesh_file: name of the created mesh file or if passed in will read it.
1760	NOTE, if there is a mesh file mesh_resolution,
1761	north_boundary, south... etc will be ignored.
1762
1763	mesh_resolution: the maximum area size for a triangle
1764
1765	north_boundary... west_boundary: the value of the boundary
1766
1767	plot_name: the name for the plot contain the results
1768
1769	seed_num: the seed to the random number generator
1770
1771	USAGE:
1772	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1773	alpha_list=[0.0001, 0.01, 1],
1774	mesh_file=None,
1775	mesh_resolution=3,
1776	north_boundary=5,
1777	south_boundary=-5,
1778	east_boundary=5,
1779	west_boundary=-5,
1780	plot_name='all_alphas',
1781	seed_num=100000,
1782	verbose=False)
1783
1784	OUTPUT: returns the minumum normalised covalance calculate AND the
1785	alpha that created it. PLUS writes a plot of the results
1786
1787	NOTE: code will not work if the data_file extend is greater than the
1788	boundary_polygon or the north_boundary...west_boundary
1789	"""
1790
1791	from anuga.shallow_water import Domain
1792	from anuga.geospatial_data.geospatial_data import Geospatial_data
1793	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1794	from anuga.utilities.numerical_tools import cov
1795	from anuga.utilities.polygon import is_inside_polygon
1796	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1797
1798
1799	attribute_smoothed = 'elevation'
1800
1801	if mesh_file is None:
1802	mesh_file = 'temp.msh'
1803
1804	if north_boundary is None or south_boundary is None \
1805	or east_boundary is None or west_boundary is None:
1806	no_boundary = True
1807	else:
1808	no_boundary = False
1809
1810	if no_boundary is True:
1811	msg = 'All boundaries must be defined'
1812	raise Expection, msg
1813
1814	poly_topo = [[east_boundary,south_boundary],
1815	[east_boundary,north_boundary],
1816	[west_boundary,north_boundary],
1817	[west_boundary,south_boundary]]
1818
1819	create_mesh_from_regions(poly_topo,
1820	boundary_tags={'back': [2],
1821	'side': [1,3],
1822	'ocean': [0]},
1823	maximum_triangle_area=mesh_resolution,
1824	filename=mesh_file,
1825	use_cache=cache,
1826	verbose=verbose)
1827
1828	else: # if mesh file provided
1829	# test mesh file exists?
1830	if access(mesh_file,F_OK) == 0:
1831	msg = "file %s doesn't exist!" % mesh_file
1832	raise IOError, msg
1833
1834	# split topo data
1835	G = Geospatial_data(file_name=data_file)
1836	if verbose: print 'start split'
1837	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1838	if verbose: print 'finish split'
1839	points = G_small.get_data_points()
1840
1841	# FIXME: Remove points outside boundary polygon
1842	# print 'new point',len(points)
1843	#
1844	# new_points=[]
1845	# new_points=array([],dtype=float)
1846	# new_points=resize(new_points,(len(points),2))
1847	# print "BOUNDARY", boundary_poly
1848	# for i,point in enumerate(points):
1849	# if is_inside_polygon(point,boundary_poly, verbose=True):
1850	# new_points[i] = point
1851	# print"WOW",i,new_points[i]
1852	# points = new_points
1853
1854	if verbose: print "Number of points in sample to compare: ", len(points)
1855
1856	if alpha_list == None:
1857	alphas = [0.001,0.01,100]
1858	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1859	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1860	else:
1861	alphas = alpha_list
1862
1863	domains = {}
1864
1865	if verbose: print 'Setup computational domains with different alphas'
1866
1867	for alpha in alphas:
1868	# add G_other data to domains with different alphas
1869	if verbose:
1870	print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1871	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1872	if verbose: print domain.statistics()
1873	domain.set_quantity(attribute_smoothed,
1874	geospatial_data=G_other,
1875	use_cache=cache,
1876	verbose=verbose,
1877	alpha=alpha)
1878	domains[alpha] = domain
1879
1880	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1881	# 4 onwards is the elevation_predicted using the alpha, which will
1882	# be compared later against the real removed data
1883	data = num.array([], dtype=num.float)
1884
1885	data = num.resize(data, (len(points), 3+len(alphas)))
1886
1887	# gets relative point from sample
1888	data[:,0] = points[:,0]
1889	data[:,1] = points[:,1]
1890	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1891	data[:,2] = elevation_sample
1892
1893	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1894
1895	if verbose:
1896	print 'Determine difference between predicted results and actual data'
1897	for i, alpha in enumerate(domains):
1898	if verbose: print'Alpha =', alpha
1899
1900	points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1901	# returns the predicted elevation of the points that were "split" out
1902	# of the original data set for one particular alpha
1903	elevation_predicted = \
1904	domains[alpha].quantities[attribute_smoothed].\
1905	get_values(interpolation_points=points_geo)
1906
1907	# add predicted elevation to array that starts with x, y, z...
1908	data[:,i+3] = elevation_predicted
1909
1910	sample_cov = cov(elevation_sample)
1911	ele_cov = cov(elevation_sample - elevation_predicted)
1912	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1913	print 'memory usage during compare', mem_usage()
1914	if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1915
1916	normal_cov0 = normal_cov[:,0]
1917	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1918
1919	if plot_name is not None:
1920	from pylab import savefig,semilogx,loglog
1921
1922	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1923	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1924	savefig(plot_name, dpi=300)
1925	if mesh_file == 'temp.msh':
1926	remove(mesh_file)
1927
1928	return (min(normal_cov_new[:,1]),
1929	normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1930
1931
1932	if __name__ == "__main__":
1933	pass
1934

Note: See TracBrowser for help on using the repository browser.

Download in other formats: