Context Navigation

source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6360

Last change on this file since 6360 was 6360, checked in by rwilson, 15 years ago
Ongoing conversion changes.
File size: 69.9 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5
6	from sys import maxint
7	from os import access, F_OK, R_OK,remove
8	from types import DictType
9	from warnings import warn
10	from string import lower
11	from RandomArray import randint, seed, get_seed
12	from copy import deepcopy
13	from Scientific.IO.NetCDF import NetCDFFile
14
15	import numpy as num
16
17	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18	from anuga.utilities.numerical_tools import ensure_numeric
19	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22	from anuga.utilities.system_tools import clean_line
23	from anuga.utilities.anuga_exceptions import ANUGAError
24	from anuga.config import points_file_block_line_size as MAX_READ_LINES
25	from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
26	from anuga.config import netcdf_float
27
28	DEFAULT_ATTRIBUTE = 'elevation'
29
30
31	##
32	# @brief ??
33	class Geospatial_data:
34
35	##
36	# @brief
37	# @param data_points Mx2 iterable of tuples or array of x,y coordinates.
38	# @param attributes Associated values for each data point.
39	# @param geo_reference ??
40	# @param default_attribute_name ??
41	# @param file_name
42	# @param latitudes ??
43	# @param longitudes ??
44	# @param points_are_lats_longs True if points are lat/long, not UTM.
45	# @param max_read_lines Size of block to read, if blocking.
46	# @param load_file_now True if blocking but we eant to read file now.
47	# @param verbose True if this class instance is verbose.
48	def __init__(self,
49	data_points=None, # this can also be a points file name
50	attributes=None,
51	geo_reference=None,
52	default_attribute_name=None,
53	file_name=None,
54	latitudes=None,
55	longitudes=None,
56	points_are_lats_longs=False,
57	max_read_lines=None,
58	load_file_now=True,
59	verbose=False):
60	"""Create instance from data points and associated attributes
61
62	data_points: x,y coordinates in meters. Type must be either a
63	sequence of 2-tuples or an Mx2 numeric array of floats. A file name
64	with extension .txt, .cvs or .pts can also be passed in here.
65
66	attributes: Associated values for each data point. The type
67	must be either a list or an array of length M or a dictionary
68	of lists (or arrays) of length M. In the latter case the keys
69	in the dictionary represent the attribute names, in the former
70	the attribute will get the default name "elevation".
71
72	geo_reference: Object representing the origin of the data
73	points. It contains UTM zone, easting and northing and data
74	points are assumed to be relative to this origin.
75	If geo_reference is None, the default geo ref object is used.
76
77	default_attribute_name: Name of default attribute to be used with
78	get_attribute_values. The idea is that the dataset can be
79	equipped with information about which attribute to return.
80	If None, the default is the "first"
81
82	latitudes, longitudes: Vectors of latitudes and longitudes,
83	used to specify location instead of points.
84
85	points_are_lats_longs: Set this as true if the points are actually
86	lats and longs, not UTM
87
88	max_read_lines: The number of rows read into memory when using
89	blocking to read a file.
90
91	load_file_now: If true the file is automatically loaded
92	into the geospatial instance. Used when blocking.
93
94	file_name: Name of input netCDF file or .txt file. netCDF file must
95	have dimensions "points" etc.
96	.txt file is a comma seperated file with x, y and attribute
97	data.
98
99	The first line has the titles of the columns. The first two
100	column titles are checked to see if they start with lat or
101	long (not case sensitive). If so the data is assumed to be
102	latitude and longitude, in decimal format and converted to
103	UTM. Otherwise the first two columns are assumed to be the x
104	and y, and the title names acually used are ignored.
105
106
107	The format for a .txt file is:
108	1st line: [column names]
109	other lines: x y [attributes]
110
111	for example:
112	x, y, elevation, friction
113	0.6, 0.7, 4.9, 0.3
114	1.9, 2.8, 5, 0.3
115	2.7, 2.4, 5.2, 0.3
116
117	The first two columns have to be x, y or lat, long
118	coordinates.
119
120
121	The format for a Points dictionary is:
122	['pointlist'] a 2 column array describing points. 1st column x,
123	2nd column y.
124	['attributelist'], a dictionary of 1D arrays, representing
125	attribute values at the point. The dictionary key is the attribute
126	header.
127	['geo_reference'] a Geo_refernece object. Use if the point
128	information is relative. This is optional.
129	eg
130	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
131	dic['attributelist']['elevation'] = [[7.0,5.0]
132
133	verbose:
134	"""
135
136	if isinstance(data_points, basestring):
137	# assume data_points is really a file name
138	file_name = data_points
139
140	self.set_verbose(verbose)
141	self.geo_reference = None # create the attribute
142	self.file_name = file_name
143
144	if max_read_lines is None:
145	self.max_read_lines = MAX_READ_LINES
146	else:
147	self.max_read_lines = max_read_lines
148
149	if file_name is None:
150	if latitudes is not None \
151	or longitudes is not None \
152	or points_are_lats_longs:
153	data_points, geo_reference = \
154	_set_using_lat_long(latitudes=latitudes,
155	longitudes=longitudes,
156	geo_reference=geo_reference,
157	data_points=data_points,
158	points_are_lats_longs=\
159	points_are_lats_longs)
160	self.check_data_points(data_points)
161	self.set_attributes(attributes)
162	self.set_geo_reference(geo_reference)
163	self.set_default_attribute_name(default_attribute_name)
164	elif load_file_now is True:
165	# watch for case where file name and points,
166	# attributes etc are provided!!
167	# if file name then all provided info will be removed!
168
169	if verbose is True:
170	if file_name is not None:
171	print 'Loading Geospatial data from file: %s' % file_name
172
173	self.import_points_file(file_name, verbose=verbose)
174
175	self.check_data_points(self.data_points)
176	self.set_attributes(self.attributes)
177	self.set_geo_reference(self.geo_reference)
178	self.set_default_attribute_name(default_attribute_name)
179
180	if verbose is True:
181	if file_name is not None:
182	print 'Geospatial data created from file: %s' % file_name
183	if load_file_now is False:
184	print 'Data will be loaded blockwise on demand'
185
186	if file_name.endswith('csv') or file_name.endswith('txt'):
187	pass
188	# This message was misleading.
189	# FIXME (Ole): Are we blocking here or not?
190	# print 'ASCII formats are not that great for '
191	# print 'blockwise reading. Consider storing this'
192	# print 'data as a pts NetCDF format'
193
194	##
195	# @brief Return length of the points set.
196	def __len__(self):
197	return len(self.data_points)
198
199	##
200	# @brief Return a string representation of the points set.
201	def __repr__(self):
202	return str(self.get_data_points(absolute=True))
203
204	##
205	# @brief Check data points.
206	# @param data_points Points data to check and store in instance.
207	# @note Throws ValueError exception if no data.
208	def check_data_points(self, data_points):
209	"""Checks data points"""
210
211	if data_points is None:
212	self.data_points = None
213	msg = 'There is no data or file provided!'
214	raise ValueError, msg
215
216	else:
217	self.data_points = ensure_numeric(data_points)
218	return
219
220	print 'self.data_points=%s' % str(self.data_points)
221	print 'self.data_points.shape=%s' % str(self.data_points.shape)
222	if not (0,) == self.data_points.shape:
223	assert len(self.data_points.shape) == 2
224	assert self.data_points.shape[1] == 2
225
226	##
227	# @brief Check and assign attributes data.
228	# @param attributes Dictionary or scalar to save as .attributes.
229	# @note Throws exception if unable to convert dict keys to numeric.
230	def set_attributes(self, attributes):
231	"""Check and assign attributes dictionary"""
232
233	if attributes is None:
234	self.attributes = None
235	return
236
237	if not isinstance(attributes, DictType):
238	# Convert single attribute into dictionary
239	attributes = {DEFAULT_ATTRIBUTE: attributes}
240
241	# Check input attributes
242	for key in attributes.keys():
243	try:
244	attributes[key] = ensure_numeric(attributes[key])
245	except:
246	msg = ("Attribute '%s' (%s) could not be converted to a"
247	"numeric vector" % (str(key), str(attributes[key])))
248	raise Exception, msg
249
250	self.attributes = attributes
251
252	##
253	# @brief Set the georeference of geospatial data.
254	# @param geo_reference The georeference data to set.
255	# @note Will raise exception if param not instance of Geo_reference.
256	def set_geo_reference(self, geo_reference):
257	"""Set the georeference of geospatial data.
258
259	It can also be used to change the georeference and will ensure that
260	the absolute coordinate values are unchanged.
261	"""
262
263	from anuga.coordinate_transforms.geo_reference import Geo_reference
264
265	if geo_reference is None:
266	# Use default - points are in absolute coordinates
267	geo_reference = Geo_reference()
268
269	# Allow for tuple (zone, xllcorner, yllcorner)
270	geo_reference = ensure_geo_reference(geo_reference)
271
272	if not isinstance(geo_reference, Geo_reference):
273	# FIXME (Ole): This exception will be raised even
274	# if geo_reference is None. Is that the intent Duncan?
275	msg = 'Argument geo_reference must be a valid Geo_reference '
276	msg += 'object or None.'
277	raise Expection, msg
278
279	# If a geo_reference already exists, change the point data according to
280	# the new geo reference
281	if self.geo_reference is not None:
282	self.data_points = self.get_data_points(geo_reference=geo_reference)
283
284	self.geo_reference = geo_reference
285
286	##
287	# @brief Set default attribute name.
288	# @param default_attribute_name The default to save.
289	def set_default_attribute_name(self, default_attribute_name):
290	self.default_attribute_name = default_attribute_name
291
292	##
293	# @brief Set the instance verbose flag.
294	# @param verbose The value to save.
295	# @note Will raise exception if param is not True or False.
296	def set_verbose(self, verbose=False):
297	if verbose in [False, True]:
298	self.verbose = verbose
299	else:
300	msg = 'Illegal value: %s' % str(verbose)
301	raise Exception, msg
302
303	##
304	# @brief Clip geospatial data by a given polygon.
305	# @param polygon The polygon to clip with.
306	# @param closed True if points on clip boundary are not included in result.
307	# @param verbose True if this function is verbose.
308	def clip(self, polygon, closed=True, verbose=False):
309	"""Clip geospatial data by a polygon
310
311	Input
312	polygon - Either a list of points, an Nx2 array or
313	a Geospatial data object.
314	closed - (optional) determine whether points on boundary should be
315	regarded as belonging to the polygon (closed = True)
316	or not (closed = False). Default is True.
317
318	Output
319	New geospatial data object representing points inside
320	specified polygon.
321
322
323	Note - this method is non-destructive and leaves the data in 'self'
324	unchanged
325	"""
326
327	from anuga.utilities.polygon import inside_polygon
328
329	if isinstance(polygon, Geospatial_data):
330	# Polygon is an object - extract points
331	polygon = polygon.get_data_points()
332
333	points = self.get_data_points()
334	inside_indices = inside_polygon(points, polygon, closed, verbose)
335
336	clipped_G = self.get_sample(inside_indices)
337
338	return clipped_G
339
340	##
341	# @brief Clip points data by polygon, return points outside polygon.
342	# @param polygon The polygon to clip with.
343	# @param closed True if points on clip boundary are not included in result.
344	# @param verbose True if this function is verbose.
345	def clip_outside(self, polygon, closed=True, verbose=False):
346	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
347
348	Input
349	polygon - Either a list of points, an Nx2 array or
350	a Geospatial data object.
351	closed - (optional) determine whether points on boundary should be
352	regarded as belonging to the polygon (closed = True)
353	or not (closed = False). Default is True.
354
355	Output
356	Geospatial data object representing point OUTSIDE specified polygon
357	"""
358
359	from anuga.utilities.polygon import outside_polygon
360
361	if isinstance(polygon, Geospatial_data):
362	# Polygon is an object - extract points
363	polygon = polygon.get_data_points()
364
365	points = self.get_data_points()
366	outside_indices = outside_polygon(points, polygon, closed,verbose)
367
368	clipped_G = self.get_sample(outside_indices)
369
370	return clipped_G
371
372	##
373	# @brief Get instance geo_reference data.
374	def get_geo_reference(self):
375	return self.geo_reference
376
377	##
378	# @brief Get coordinates for all data points as an Nx2 array.
379	# @param absolute If True, return UTM, else relative to xll/yll corners.
380	# @param geo_reference If supplied, points are relative to it.
381	# @param as_lat_long If True, return points as lat/lon.
382	# @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
383	# @return A set of data points, in appropriate form.
384	def get_data_points(self, absolute=True, geo_reference=None,
385	as_lat_long=False, isSouthHemisphere=True):
386	"""Get coordinates for all data points as an Nx2 array
387
388	If absolute is False returned coordinates are relative to the
389	internal georeference's xll and yll corners, otherwise
390	absolute UTM coordinates are returned.
391
392	If a geo_reference is passed the points are returned relative
393	to that geo_reference.
394
395	isSH (isSouthHemisphere) is only used when getting data
396	points "as_lat_long" is True and if FALSE will return lats and
397	longs valid for the Northern Hemisphere.
398
399	Default: absolute is True.
400	"""
401
402	if as_lat_long is True:
403	msg = "Points need a zone to be converted into lats and longs"
404	assert self.geo_reference is not None, msg
405	zone = self.geo_reference.get_zone()
406	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
407	lats_longs = []
408	for point in self.get_data_points(True):
409	# UTMtoLL(northing, easting, zone,
410	lat_calced, long_calced = UTMtoLL(point[1], point[0],
411	zone, isSouthHemisphere)
412	lats_longs.append((lat_calced, long_calced)) # to hash
413	return lats_longs
414
415	if absolute is True and geo_reference is None:
416	return self.geo_reference.get_absolute(self.data_points)
417	elif geo_reference is not None:
418	return geo_reference.change_points_geo_ref(self.data_points,
419	self.geo_reference)
420	else:
421	# If absolute is False
422	return self.data_points
423
424	##
425	# @brief Get value for attribute name.
426	# @param attribute_name Name to get value for.
427	# @note If name passed is None, return default attribute value.
428	def get_attributes(self, attribute_name=None):
429	"""Return values for one named attribute.
430
431	If attribute_name is None, default_attribute_name is used
432	"""
433
434	if attribute_name is None:
435	if self.default_attribute_name is not None:
436	attribute_name = self.default_attribute_name
437	else:
438	attribute_name = self.attributes.keys()[0]
439	# above line takes the first one from keys
440
441	if self.verbose is True:
442	print 'Using attribute %s' %attribute_name
443	print 'Available attributes: %s' %(self.attributes.keys())
444
445	msg = 'Attribute name %s does not exist in data set' % attribute_name
446	assert self.attributes.has_key(attribute_name), msg
447
448	return self.attributes[attribute_name]
449
450	##
451	# @brief Get all instance attributes.
452	# @return The instance attribute dictionary, or None if no attributes.
453	def get_all_attributes(self):
454	"""Return values for all attributes.
455	The return value is either None or a dictionary (possibly empty).
456	"""
457
458	return self.attributes
459
460	##
461	# @brief Override __add__() to allow addition of geospatial objects.
462	# @param self This object.
463	# @param other The second object.
464	# @return The new geospatial object.
465	def __add__(self, other):
466	"""Returns the addition of 2 geospatical objects,
467	objects are concatencated to the end of each other
468
469	NOTE: doesn't add if objects contain different
470	attributes
471
472	Always return absolute points!
473	This also means, that if you add None to the object,
474	it will be turned into absolute coordinates
475
476	other can be None in which case nothing is added to self.
477	"""
478
479	# find objects zone and checks if the same
480	geo_ref1 = self.get_geo_reference()
481	zone1 = geo_ref1.get_zone()
482
483	if other is not None:
484	geo_ref2 = other.get_geo_reference()
485	zone2 = geo_ref2.get_zone()
486	geo_ref1.reconcile_zones(geo_ref2)
487	new_points = num.concatenate((self.get_data_points(absolute=True),
488	other.get_data_points(absolute=True)),
489	axis = 0)
490
491	# Concatenate attributes if any
492	if self.attributes is None:
493	if other.attributes is not None:
494	msg = 'Geospatial data must have the same '
495	msg += 'attributes to allow addition.'
496	raise Exception, msg
497
498	new_attributes = None
499	else:
500	new_attributes = {}
501	for x in self.attributes.keys():
502	if other.attributes.has_key(x):
503	attrib1 = self.attributes[x]
504	attrib2 = other.attributes[x]
505	new_attributes[x] = num.concatenate((attrib1, attrib2))
506	else:
507	msg = 'Geospatial data must have the same \n'
508	msg += 'attributes to allow addition.'
509	raise Exception, msg
510	else:
511	# other is None:
512	new_points = self.get_data_points(absolute=True)
513	new_attributes = self.attributes
514
515	# Instantiate new data object and return absolute coordinates
516	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
517	return Geospatial_data(new_points, new_attributes, new_geo_ref)
518
519	##
520	# @brief Override the addition case where LHS isn't geospatial object.
521	# @param self This object.
522	# @param other The second object.
523	# @return The new geospatial object.
524	def __radd__(self, other):
525	"""Handle cases like None + Geospatial_data(...)"""
526
527	return self + other
528
529	###
530	# IMPORT/EXPORT POINTS FILES
531	###
532
533	##
534	# @brief Import a .txt, .csv or .pts points data file.
535	# @param file_name
536	# @param delimiter
537	# @param verbose True if this function is to be verbose.
538	# @note Will throw IOError or SyntaxError if there is a problem.
539	def import_points_file(self, file_name, delimiter=None, verbose=False):
540	""" load an .txt, .csv or .pts file
541
542	Note: will throw an IOError/SyntaxError if it can't load the file.
543	Catch these!
544
545	Post condition: self.attributes dictionary has been set
546	"""
547
548	if access(file_name, F_OK) == 0 :
549	msg = 'File %s does not exist or is not accessible' % file_name
550	raise IOError, msg
551
552	attributes = {}
553	if file_name[-4:]== ".pts":
554	try:
555	data_points, attributes, geo_reference = \
556	_read_pts_file(file_name, verbose)
557	except IOError, e:
558	msg = 'Could not open file %s ' % file_name
559	raise IOError, msg
560	elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
561	try:
562	data_points, attributes, geo_reference = \
563	_read_csv_file(file_name, verbose)
564	except IOError, e:
565	# This should only be if a file is not found
566	msg = 'Could not open file %s. ' % file_name
567	msg += 'Check the file location.'
568	raise IOError, msg
569	except SyntaxError, e:
570	# This should only be if there is a format error
571	msg = 'Problem with format of file %s. \n' %file_name
572	msg += Error_message['IOError']
573	raise SyntaxError, msg
574	else:
575	msg = 'Extension %s is unknown' %file_name[-4:]
576	raise IOError, msg
577
578	self.data_points = data_points
579	self.attributes = attributes
580	self.geo_reference = geo_reference
581
582	##
583	# @brief Write points data to a file (.csv or .pts).
584	# @param file_name Path to file to write.
585	# @param absolute ??
586	# @param as_lat_long ??
587	# @param isSouthHemisphere ??
588	def export_points_file(self, file_name, absolute=True,
589	as_lat_long=False, isSouthHemisphere=True):
590
591	"""write a points file as a text (.csv) or binary (.pts) file
592
593	file_name is the file name, including the extension
594	The point_dict is defined at the top of this file.
595
596	If absolute is True data the xll and yll are added to the points value
597	and the xll and yll of the geo_reference are set to 0.
598
599	If absolute is False data points at returned as relative to the xll
600	and yll and geo_reference remains uneffected
601
602	isSouthHemisphere: is only used when getting data
603	points "as_lat_long" is True and if FALSE will return lats and
604	longs valid for the Northern Hemisphere.
605	"""
606
607	if (file_name[-4:] == ".pts"):
608	if absolute is True:
609	geo_ref = deepcopy(self.geo_reference)
610	geo_ref.xllcorner = 0
611	geo_ref.yllcorner = 0
612	_write_pts_file(file_name,
613	self.get_data_points(absolute),
614	self.get_all_attributes(),
615	geo_ref)
616	else:
617	_write_pts_file(file_name,
618	self.get_data_points(absolute),
619	self.get_all_attributes(),
620	self.get_geo_reference())
621
622	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
623	msg = "ERROR: trying to write a .txt file with relative data."
624	assert absolute, msg
625	_write_csv_file(file_name,
626	self.get_data_points(absolute=True,
627	as_lat_long=as_lat_long,
628	isSouthHemisphere=isSouthHemisphere),
629	self.get_all_attributes(),
630	as_lat_long=as_lat_long)
631
632	elif file_name[-4:] == ".urs" :
633	msg = "ERROR: Can not write a .urs file as a relative file."
634	assert absolute, msg
635	_write_urs_file(file_name,
636	self.get_data_points(as_lat_long=True,
637	isSouthHemisphere=isSouthHemisphere))
638
639	else:
640	msg = 'Unknown file type %s ' %file_name
641	raise IOError, msg
642
643	##
644	# @brief Get a subset of data that is referred to by 'indices'.
645	# @param indices A list of indices to select data subset with.
646	# @return A geospatial object containing data subset.
647	def get_sample(self, indices):
648	""" Returns a object which is a subset of the original
649	and the data points and attributes in this new object refer to
650	the indices provided
651
652	Input
653	indices- a list of integers that represent the new object
654	Output
655	New geospatial data object representing points specified by
656	the indices
657	"""
658
659	# FIXME: add the geo_reference to this
660	points = self.get_data_points()
661	sampled_points = num.take(points, indices)
662
663	attributes = self.get_all_attributes()
664
665	sampled_attributes = {}
666	if attributes is not None:
667	for key, att in attributes.items():
668	sampled_attributes[key] = num.take(att, indices)
669
670	return Geospatial_data(sampled_points, sampled_attributes)
671
672	##
673	# @brief Split one geospatial object into two.
674	# @param factor Relative size to make first result object.
675	# @param seed_num Random 'seed' - used only for unit test.
676	# @param verbose True if this function is to be verbose.
677	# @note Points in each result object are selected randomly.
678	def split(self, factor=0.5, seed_num=None, verbose=False):
679	"""Returns two geospatial_data object, first is the size of the 'factor'
680	smaller the original and the second is the remainder. The two
681	new objects are disjoint sets of each other.
682
683	Points of the two new object have selected RANDOMLY.
684
685	This method create two lists of indices which are passed into
686	get_sample. The lists are created using random numbers, and
687	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
688	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
689
690	Input - the factor which to split the object, if 0.1 then 10% of the
691	together object will be returned
692
693	Output - two geospatial_data objects that are disjoint sets of the
694	original
695	"""
696
697	i=0
698	self_size = len(self)
699	random_list = []
700	remainder_list = []
701	new_size = round(factor*self_size)
702
703	# Find unique random numbers
704	if verbose: print "make unique random number list and get indices"
705
706	total=num.array(range(self_size))
707	total_list = total.tolist()
708
709	if verbose: print "total list len", len(total_list)
710
711	# There will be repeated random numbers however will not be a
712	# problem as they are being 'pop'ed out of array so if there
713	# are two numbers the same they will pop different indicies,
714	# still basically random
715	## create list of non-unquie random numbers
716	if verbose: print "create random numbers list %s long" %new_size
717
718	# Set seed if provided, mainly important for unit test!
719	# plus recalcule seed when no seed provided.
720	if seed_num != None:
721	seed(seed_num, seed_num)
722	else:
723	seed()
724
725	if verbose: print "seed:", get_seed()
726
727	random_num = randint(0, self_size-1, (int(new_size),))
728	random_num = random_num.tolist()
729
730	# need to sort and reverse so the pop() works correctly
731	random_num.sort()
732	random_num.reverse()
733
734	if verbose: print "make random number list and get indices"
735
736	j=0
737	k=1
738	remainder_list = total_list[:]
739
740	# pops array index (random_num) from remainder_list
741	# (which starts as the
742	# total_list and appends to random_list
743	random_num_len = len(random_num)
744	for i in random_num:
745	random_list.append(remainder_list.pop(i))
746	j += 1
747	# prints progress
748	if verbose and round(random_num_len/10*k) == j:
749	print '(%s/%s)' % (j, random_num_len)
750	k += 1
751
752	# FIXME: move to tests, it might take a long time
753	# then create an array of random lenght between 500 and 1000,
754	# and use a random factor between 0 and 1
755	# setup for assertion
756	test_total = random_list[:]
757	test_total.extend(remainder_list)
758	test_total.sort()
759	msg = 'The two random lists made from the original list when added ' \
760	'together DO NOT equal the original list'
761	assert total_list == test_total, msg
762
763	# Get new samples
764	if verbose: print "get values of indices for random list"
765	G1 = self.get_sample(random_list)
766	if verbose: print "get values of indices for opposite of random list"
767	G2 = self.get_sample(remainder_list)
768
769	return G1, G2
770
771	##
772	# @brief Allow iteration over this object.
773	def __iter__(self):
774	"""Read in the header, number_of_points and save the
775	file pointer position
776	"""
777	from Scientific.IO.NetCDF import NetCDFFile
778	# FIXME - what to do if the file isn't there
779
780	# FIXME (Ole): Shouldn't this go into the constructor?
781	# This method acts like the constructor when blocking.
782	# ... and shouldn't it be called block_size?
783	#
784	if self.max_read_lines is None:
785	self.max_read_lines = MAX_READ_LINES
786
787	if self.file_name[-4:] == ".pts":
788	# See if the file is there. Throw a QUIET IO error if it isn't
789	fd = open(self.file_name,'r')
790	fd.close()
791
792	# Throws prints to screen if file not present
793	self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
794
795	(self.blocking_georef,
796	self.blocking_keys,
797	self.number_of_points) = _read_pts_file_header(self.fid,
798	self.verbose)
799	self.start_row = 0
800	self.last_row = self.number_of_points
801	self.show_verbose = 0
802	self.verbose_block_size = (self.last_row + 10)/10
803	self.block_number = 0
804	self.number_of_blocks = self.number_of_points/self.max_read_lines
805	# This computes the number of full blocks. The last block may be
806	# smaller and won't be ircluded in this estimate.
807
808	if self.verbose is True:
809	print 'Reading %d points (in ~%d blocks) from file %s. ' \
810	% (self.number_of_points,
811	self.number_of_blocks,
812	self.file_name),
813	print 'Each block consists of %d data points' \
814	% self.max_read_lines
815
816	else:
817	# Assume the file is a csv file
818	file_pointer = open(self.file_name)
819	self.header, self.file_pointer = _read_csv_file_header(file_pointer)
820	self.blocking_georef = None # Used for reconciling zones
821
822	return self
823
824	##
825	# @brief Read another block into the instance.
826	def next(self):
827	"""read a block, instanciate a new geospatial and return it"""
828
829	if self.file_name[-4:] == ".pts":
830	if self.start_row == self.last_row:
831	# Read the end of the file last iteration
832	# Remove blocking attributes
833	self.fid.close()
834	del self.max_read_lines
835	del self.blocking_georef
836	del self.last_row
837	del self.start_row
838	del self.blocking_keys
839	del self.fid
840	raise StopIteration
841	fin_row = self.start_row + self.max_read_lines
842	if fin_row > self.last_row:
843	fin_row = self.last_row
844
845	if self.verbose is True:
846	if self.show_verbose >= self.start_row \
847	and self.show_verbose < fin_row:
848	print 'Reading block %d (points %d to %d) out of %d'\
849	%(self.block_number,
850	self.start_row,
851	fin_row,
852	self.number_of_blocks)
853
854	self.show_verbose += max(self.max_read_lines,
855	self.verbose_block_size)
856
857
858	# Read next block
859	pointlist, att_dict, = _read_pts_file_blocking(self.fid,
860	self.start_row,
861	fin_row,
862	self.blocking_keys)
863
864	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
865	self.start_row = fin_row
866
867	self.block_number += 1
868
869	else:
870	# Assume the file is a csv file
871	try:
872	(pointlist,
873	att_dict,
874	geo_ref,
875	self.file_pointer) = \
876	_read_csv_file_blocking(self.file_pointer,
877	self.header[:],
878	max_read_lines=\
879	self.max_read_lines,
880	verbose=self.verbose)
881
882	# Check that the zones haven't changed.
883	if geo_ref is not None:
884	geo_ref.reconcile_zones(self.blocking_georef)
885	self.blocking_georef = geo_ref
886	elif self.blocking_georef is not None:
887	msg = 'Geo reference given, then not given.'
888	msg += ' This should not happen.'
889	raise ValueError, msg
890	geo = Geospatial_data(pointlist, att_dict, geo_ref)
891	except StopIteration:
892	self.file_pointer.close()
893	del self.header
894	del self.file_pointer
895	raise StopIteration
896	except ANUGAError:
897	self.file_pointer.close()
898	del self.header
899	del self.file_pointer
900	raise
901	except SyntaxError:
902	self.file_pointer.close()
903	del self.header
904	del self.file_pointer
905	# This should only be if there is a format error
906	msg = 'Could not open file %s. \n' % self.file_name
907	msg += Error_message['IOError']
908	raise SyntaxError, msg
909	return geo
910
911
912	##################### Error messages ###########
913	Error_message = {}
914	Em = Error_message
915	Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
916	Em['IOError'] += " 1st line: [column names]\n"
917	Em['IOError'] += " other lines: [x value], [y value], [attributes]\n"
918	Em['IOError'] += "\n"
919	Em['IOError'] += " for example:\n"
920	Em['IOError'] += " x, y, elevation, friction\n"
921	Em['IOError'] += " 0.6, 0.7, 4.9, 0.3\n"
922	Em['IOError'] += " 1.9, 2.8, 5, 0.3\n"
923	Em['IOError'] += " 2.7, 2.4, 5.2, 0.3\n"
924	Em['IOError'] += "\n"
925	Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
926	Em['IOError'] += "The attribute values must be numeric.\n"
927
928	##
929	# @brief ??
930	# @param latitudes ??
931	# @param longitudes ??
932	# @param geo_reference ??
933	# @param data_points ??
934	# @param points_are_lats_longs ??
935	def _set_using_lat_long(latitudes,
936	longitudes,
937	geo_reference,
938	data_points,
939	points_are_lats_longs):
940	"""If the points has lat long info, assume it is in (lat, long) order."""
941
942	if geo_reference is not None:
943	msg = "A georeference is specified yet latitude and longitude " \
944	"are also specified!"
945	raise ValueError, msg
946
947	if data_points is not None and not points_are_lats_longs:
948	msg = "Data points are specified yet latitude and longitude are " \
949	"also specified."
950	raise ValueError, msg
951
952	if points_are_lats_longs:
953	if data_points is None:
954	msg = "Data points are not specified."
955	raise ValueError, msg
956	lats_longs = ensure_numeric(data_points)
957	latitudes = num.ravel(lats_longs[:,0:1])
958	longitudes = num.ravel(lats_longs[:,1:])
959
960	if latitudes is None and longitudes is None:
961	msg = "Latitudes and Longitudes are not specified."
962	raise ValueError, msg
963
964	if latitudes is None:
965	msg = "Longitudes are specified yet latitudes aren't."
966	raise ValueError, msg
967
968	if longitudes is None:
969	msg = "Latitudes are specified yet longitudes aren't."
970	raise ValueError, msg
971
972	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
973	longitudes=longitudes)
974	return data_points, Geo_reference(zone=zone)
975
976
977	##
978	# @brief Read a .pts data file.
979	# @param file_name Path to file to read.
980	# @param verbose True if this function is to be verbose.
981	# @return (pointlist, attributes, geo_reference)
982	def _read_pts_file(file_name, verbose=False):
983	"""Read .pts NetCDF file
984
985	Return a dic of array of points, and dic of array of attribute
986	eg
987	dic['points'] = [[1.0,2.0],[3.0,5.0]]
988	dic['attributelist']['elevation'] = [[7.0,5.0]]
989	"""
990
991	from Scientific.IO.NetCDF import NetCDFFile
992
993	if verbose: print 'Reading ', file_name
994
995	# See if the file is there. Throw a QUIET IO error if it isn't
996	fd = open(file_name,'r')
997	fd.close()
998
999	# Throws prints to screen if file not present
1000	fid = NetCDFFile(file_name, netcdf_mode_r)
1001
1002	pointlist = num.array(fid.variables['points'])
1003	keys = fid.variables.keys()
1004
1005	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1006
1007	try:
1008	keys.remove('points')
1009	except IOError, e:
1010	fid.close()
1011	msg = "Expected keyword 'points' but could not find it"
1012	raise IOError, msg
1013
1014	attributes = {}
1015	for key in keys:
1016	if verbose: print "reading attribute '%s'" % key
1017
1018	attributes[key] = num.array(fid.variables[key])
1019
1020	try:
1021	geo_reference = Geo_reference(NetCDFObject=fid)
1022	except AttributeError, e:
1023	geo_reference = None
1024
1025	fid.close()
1026
1027	return pointlist, attributes, geo_reference
1028
1029
1030	##
1031	# @brief Read a .csv data file.
1032	# @param file_name Path to the .csv file to read.
1033	# @param verbose True if this function is to be verbose.
1034	def _read_csv_file(file_name, verbose=False):
1035	"""Read .csv file
1036
1037	Return a dic of array of points, and dic of array of attribute
1038	eg
1039	dic['points'] = [[1.0,2.0],[3.0,5.0]]
1040	dic['attributelist']['elevation'] = [[7.0,5.0]]
1041	"""
1042
1043	file_pointer = open(file_name)
1044	header, file_pointer = _read_csv_file_header(file_pointer)
1045	try:
1046	(pointlist,
1047	att_dict,
1048	geo_ref,
1049	file_pointer) = _read_csv_file_blocking(file_pointer,
1050	header,
1051	max_read_lines=1e30)
1052	# If the file is bigger that this, block..
1053	# FIXME (Ole) What's up here?
1054	except ANUGAError:
1055	file_pointer.close()
1056	raise
1057
1058	file_pointer.close()
1059
1060	return pointlist, att_dict, geo_ref
1061
1062
1063	##
1064	# @brief Read a .csv file header.
1065	# @param file_pointer Open descriptor of the file to read.
1066	# @param delimiter Header line delimiter string, split on this string.
1067	# @param verbose True if this function is to be verbose.
1068	# @return A tuple of (<cleaned header string>, <input file_pointer>)
1069
1070	CSV_DELIMITER = ','
1071
1072	def _read_csv_file_header(file_pointer,
1073	delimiter=CSV_DELIMITER,
1074	verbose=False):
1075	"""Read the header of a .csv file
1076	Return a list of the header names
1077	"""
1078
1079	line = file_pointer.readline().strip()
1080	header = clean_line(line, delimiter)
1081
1082	return header, file_pointer
1083
1084	##
1085	# @brief Read a .csv file, with blocking.
1086	# @param file_pointer Open descriptor of the file to read.
1087	# @param header List of already read .csv header fields.
1088	# @param delimiter Delimiter string header was split on.
1089	# @param max_read_lines The max number of lines to read before blocking.
1090	# @param verbose True if this function is to be verbose.
1091	# @note Will throw IndexError, SyntaxError exceptions.
1092	def _read_csv_file_blocking(file_pointer,
1093	header,
1094	delimiter=CSV_DELIMITER,
1095	max_read_lines=MAX_READ_LINES,
1096	verbose=False):
1097	"""Read the body of a .csv file.
1098	header: The list header of the csv file, with the x and y labels.
1099	"""
1100
1101	points = []
1102	pointattributes = []
1103	att_dict = {}
1104
1105	# This is to remove the x and y headers.
1106	header = header[:]
1107	try:
1108	x_header = header.pop(0)
1109	y_header = header.pop(0)
1110	except IndexError:
1111	# if there are not two columns this will occur.
1112	# eg if it is a space seperated file
1113	raise SyntaxError
1114
1115	read_lines = 0
1116	while read_lines < max_read_lines:
1117	line = file_pointer.readline()
1118	numbers = clean_line(line, delimiter)
1119	if len(numbers) <= 1:
1120	break
1121	if line[0] == '#':
1122	continue
1123
1124	read_lines += 1
1125
1126	try:
1127	x = float(numbers[0])
1128	y = float(numbers[1])
1129	points.append([x,y])
1130	numbers.pop(0)
1131	numbers.pop(0)
1132	if len(header) != len(numbers):
1133	file_pointer.close()
1134	msg = "File load error. " \
1135	"There might be a problem with the file header."
1136	raise SyntaxError, msg
1137	for i,n in enumerate(numbers):
1138	n.strip()
1139	if n != '\n' and n != '':
1140	#attributes.append(float(n))
1141	att_dict.setdefault(header[i],[]).append(float(n))
1142	except ValueError:
1143	raise SyntaxError
1144
1145	if points == []:
1146	raise StopIteration
1147
1148	pointlist = num.array(points, num.float)
1149	for key in att_dict.keys():
1150	att_dict[key] = num.array(att_dict[key], num.float)
1151
1152	# Do stuff here so the info is in lat's and longs
1153	geo_ref = None
1154	x_header = lower(x_header[:3])
1155	y_header = lower(y_header[:3])
1156	if (x_header == 'lon' or x_header == 'lat') \
1157	and (y_header == 'lon' or y_header == 'lat'):
1158	if x_header == 'lon':
1159	longitudes = num.ravel(pointlist[:,0:1])
1160	latitudes = num.ravel(pointlist[:,1:])
1161	else:
1162	latitudes = num.ravel(pointlist[:,0:1])
1163	longitudes = num.ravel(pointlist[:,1:])
1164
1165	pointlist, geo_ref = _set_using_lat_long(latitudes,
1166	longitudes,
1167	geo_reference=None,
1168	data_points=None,
1169	points_are_lats_longs=False)
1170
1171	return pointlist, att_dict, geo_ref, file_pointer
1172
1173
1174	##
1175	# @brief Read a .pts file header.
1176	# @param fid Handle to the open .pts file.
1177	# @param verbose True if the function is to be verbose.
1178	# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1179	# @note Will throw IOError and AttributeError exceptions.
1180	def _read_pts_file_header(fid, verbose=False):
1181	"""Read the geo_reference and number_of_points from a .pts file"""
1182
1183	keys = fid.variables.keys()
1184	try:
1185	keys.remove('points')
1186	except IOError, e:
1187	fid.close()
1188	msg = "Expected keyword 'points' but could not find it."
1189	raise IOError, msg
1190
1191	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1192
1193	try:
1194	geo_reference = Geo_reference(NetCDFObject=fid)
1195	except AttributeError, e:
1196	geo_reference = None
1197
1198	return geo_reference, keys, fid.dimensions['number_of_points']
1199
1200
1201	##
1202	# @brief Read the body of a .csf file, with blocking.
1203	# @param fid Handle to already open file.
1204	# @param start_row Start row index of points to return.
1205	# @param fin_row End row index of points to return.
1206	# @param keys Iterable of keys to return.
1207	# @return Tuple of (pointlist, attributes).
1208	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1209	"""Read the body of a .csv file."""
1210
1211	pointlist = num.array(fid.variables['points'][start_row:fin_row])
1212
1213	attributes = {}
1214	for key in keys:
1215	attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1216
1217	return pointlist, attributes
1218
1219
1220	##
1221	# @brief Write a .pts data file.
1222	# @param file_name Path to the file to write.
1223	# @param write_data_points Data points to write.
1224	# @param write_attributes Attributes to write.
1225	# @param write_geo_reference Georef to write.
1226	def _write_pts_file(file_name,
1227	write_data_points,
1228	write_attributes=None,
1229	write_geo_reference=None):
1230	"""Write .pts NetCDF file
1231
1232	NOTE: Below might not be valid ask Duncan : NB 5/2006
1233
1234	WARNING: This function mangles the point_atts data structure
1235	# F??ME: (DSG)This format has issues.
1236	# There can't be an attribute called points
1237	# consider format change
1238	# method changed by NB not sure if above statement is correct
1239
1240	should create new test for this
1241	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1242	for key in point_atts.keys():
1243	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1244	assert key in legal_keys, msg
1245	"""
1246
1247	from Scientific.IO.NetCDF import NetCDFFile
1248
1249	# NetCDF file definition
1250	outfile = NetCDFFile(file_name, netcdf_mode_w)
1251
1252	# Create new file
1253	outfile.institution = 'Geoscience Australia'
1254	outfile.description = 'NetCDF format for compact and portable storage ' \
1255	'of spatial point data'
1256
1257	# Dimension definitions
1258	shape = write_data_points.shape[0]
1259	outfile.createDimension('number_of_points', shape)
1260	outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1261
1262	# Variable definition
1263	outfile.createVariable('points', netcdf_float, ('number_of_points',
1264	'number_of_dimensions'))
1265
1266	# create variables
1267	outfile.variables['points'][:] = write_data_points
1268
1269	if write_attributes is not None:
1270	for key in write_attributes.keys():
1271	outfile.createVariable(key, netcdf_float, ('number_of_points',))
1272	outfile.variables[key][:] = write_attributes[key]
1273
1274	if write_geo_reference is not None:
1275	write_NetCDF_georeference(write_geo_reference, outfile)
1276
1277	outfile.close()
1278
1279
1280	##
1281	# @brief Write a .csv data file.
1282	# @param file_name Path to the file to write.
1283	# @param write_data_points Data points to write.
1284	# @param write_attributes Attributes to write.
1285	# @param as_lat_long True if points are lat/lon, else x/y.
1286	# @param delimiter The CSV delimiter to use.
1287	def _write_csv_file(file_name,
1288	write_data_points,
1289	write_attributes=None,
1290	as_lat_long=False,
1291	delimiter=','):
1292	"""Write a .csv file."""
1293
1294	points = write_data_points
1295	pointattributes = write_attributes
1296
1297	fd = open(file_name, 'w')
1298
1299	if as_lat_long:
1300	titlelist = "latitude" + delimiter + "longitude" + delimiter
1301	else:
1302	titlelist = "x" + delimiter + "y" + delimiter
1303
1304	if pointattributes is not None:
1305	for title in pointattributes.keys():
1306	titlelist = titlelist + title + delimiter
1307	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1308
1309	fd.write(titlelist + "\n")
1310
1311	# <x/lat> <y/long> [attributes]
1312	for i, vert in enumerate( points):
1313	if pointattributes is not None:
1314	attlist = ","
1315	for att in pointattributes.keys():
1316	attlist = attlist + str(pointattributes[att][i]) + delimiter
1317	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1318	attlist.strip()
1319	else:
1320	attlist = ''
1321
1322	fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1323
1324	fd.close()
1325
1326
1327	##
1328	# @brief Write a URS file.
1329	# @param file_name The path of the file to write.
1330	# @param points
1331	# @param delimiter
1332	def _write_urs_file(file_name, points, delimiter=' '):
1333	"""Write a URS format file.
1334	export a file, file_name, with the urs format
1335	the data points are in lats and longs
1336	"""
1337
1338	fd = open(file_name, 'w')
1339
1340	# first line is # points
1341	fd.write(str(len(points)) + "\n")
1342
1343	# <lat> <long> <id#>
1344	for i, vert in enumerate( points):
1345	fd.write(str(round(vert[0],7)) + delimiter +
1346	str(round(vert[1],7)) + delimiter + str(i) + "\n")
1347
1348	fd.close()
1349
1350
1351	##
1352	# @brief ??
1353	# @param point_atts ??
1354	# @return ??
1355	def _point_atts2array(point_atts):
1356	point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1357
1358	for key in point_atts['attributelist'].keys():
1359	point_atts['attributelist'][key] = \
1360	num.array(point_atts['attributelist'][key], num.float)
1361
1362	return point_atts
1363
1364
1365	##
1366	# @brief Convert geospatial object to a points dictionary.
1367	# @param geospatial_data The geospatial object to convert.
1368	# @return A points dictionary.
1369	def geospatial_data2points_dictionary(geospatial_data):
1370	"""Convert geospatial data to points_dictionary"""
1371
1372	points_dictionary = {}
1373	points_dictionary['pointlist'] = geospatial_data.data_points
1374
1375	points_dictionary['attributelist'] = {}
1376
1377	for attribute_name in geospatial_data.attributes.keys():
1378	val = geospatial_data.attributes[attribute_name]
1379	points_dictionary['attributelist'][attribute_name] = val
1380
1381	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1382
1383	return points_dictionary
1384
1385
1386	##
1387	# @brief Convert a points dictionary to a geospatial object.
1388	# @param points_dictionary A points dictionary to convert.
1389	def points_dictionary2geospatial_data(points_dictionary):
1390	"""Convert points_dictionary to geospatial data object"""
1391
1392	msg = 'Points dictionary must have key pointlist'
1393	assert points_dictionary.has_key('pointlist'), msg
1394
1395	msg = 'Points dictionary must have key attributelist'
1396	assert points_dictionary.has_key('attributelist'), msg
1397
1398	if points_dictionary.has_key('geo_reference'):
1399	geo = points_dictionary['geo_reference']
1400	else:
1401	geo = None
1402
1403	return Geospatial_data(points_dictionary['pointlist'],
1404	points_dictionary['attributelist'],
1405	geo_reference = geo)
1406
1407
1408	##
1409	# @brief Ensure that points are in absolute coordinates.
1410	# @param points A list or array of points to check, or geospatial object.
1411	# @param geo_reference If supplied,
1412	# @return ??
1413	def ensure_absolute(points, geo_reference=None):
1414	"""Ensure that points are in absolute coordinates.
1415
1416	This function inputs several formats and
1417	outputs one format. - a numeric array of absolute points.
1418
1419	Input formats are;
1420	points: List or numeric array of coordinate pairs [xi, eta] of
1421	points or geospatial object or points file name
1422
1423	mesh_origin: A geo_reference object or 3-tuples consisting of
1424	UTM zone, easting and northing.
1425	If specified vertex coordinates are assumed to be
1426	relative to their respective origins.
1427	"""
1428
1429	# Input check
1430	if isinstance(points, basestring):
1431	# It's a string - assume it is a point file
1432	points = Geospatial_data(file_name=points)
1433
1434	if isinstance(points, Geospatial_data):
1435	points = points.get_data_points(absolute=True)
1436	msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1437	assert geo_reference == None, msg
1438	else:
1439	points = ensure_numeric(points, num.float)
1440
1441	# Sort of geo_reference and convert points
1442	if geo_reference is None:
1443	geo = None # Geo_reference()
1444	else:
1445	if isinstance(geo_reference, Geo_reference):
1446	geo = geo_reference
1447	else:
1448	geo = Geo_reference(geo_reference[0],
1449	geo_reference[1],
1450	geo_reference[2])
1451	points = geo.get_absolute(points)
1452
1453	return points
1454
1455
1456	##
1457	# @brief
1458	# @param points
1459	# @param geo_reference
1460	# @return A geospatial object.
1461	def ensure_geospatial(points, geo_reference=None):
1462	"""Convert various data formats to a geospatial_data instance.
1463
1464	Inputed formats are;
1465	points: List or numeric array of coordinate pairs [xi, eta] of
1466	points or geospatial object
1467
1468	mesh_origin: A geo_reference object or 3-tuples consisting of
1469	UTM zone, easting and northing.
1470	If specified vertex coordinates are assumed to be
1471	relative to their respective origins.
1472	"""
1473
1474	# Input check
1475	if isinstance(points, Geospatial_data):
1476	msg = "Use a Geospatial_data object or a mesh origin, not both."
1477	assert geo_reference is None, msg
1478	return points
1479	else:
1480	# List or numeric array of absolute points
1481	points = ensure_numeric(points, num.float)
1482
1483	# Sort out geo reference
1484	if geo_reference is None:
1485	geo = None
1486	else:
1487	if isinstance(geo_reference, Geo_reference):
1488	geo = geo_reference
1489	else:
1490	geo = Geo_reference(geo_reference[0],
1491	geo_reference[1],
1492	geo_reference[2])
1493
1494	# Create Geospatial_data object with appropriate geo reference and return
1495	points = Geospatial_data(data_points=points, geo_reference=geo)
1496
1497	return points
1498
1499
1500	##
1501	# @brief
1502	# @param data_file
1503	# @param alpha_list
1504	# @param mesh_file
1505	# @param boundary_poly
1506	# @param mesh_resolution
1507	# @param north_boundary
1508	# @param south_boundary
1509	# @param east_boundary
1510	# @param west_boundary
1511	# @param plot_name
1512	# @param split_factor
1513	# @param seed_num
1514	# @param cache
1515	# @param verbose
1516	def find_optimal_smoothing_parameter(data_file,
1517	alpha_list=None,
1518	mesh_file=None,
1519	boundary_poly=None,
1520	mesh_resolution=100000,
1521	north_boundary=None,
1522	south_boundary=None,
1523	east_boundary=None,
1524	west_boundary=None,
1525	plot_name='all_alphas',
1526	split_factor=0.1,
1527	seed_num=None,
1528	cache=False,
1529	verbose=False):
1530	"""Removes a small random sample of points from 'data_file'.
1531	Then creates models with different alpha values from 'alpha_list' and
1532	cross validates the predicted value to the previously removed point data.
1533	Returns the alpha value which has the smallest covariance.
1534
1535	data_file: must not contain points outside the boundaries defined
1536	and it either a pts, txt or csv file.
1537
1538	alpha_list: the alpha values to test in a single list
1539
1540	mesh_file: name of the created mesh file or if passed in will read it.
1541	NOTE, if there is a mesh file mesh_resolution,
1542	north_boundary, south... etc will be ignored.
1543
1544	mesh_resolution: the maximum area size for a triangle
1545
1546	north_boundary... west_boundary: the value of the boundary
1547
1548	plot_name: the name for the plot contain the results
1549
1550	seed_num: the seed to the random number generator
1551
1552	USAGE:
1553	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1554	alpha_list=[0.0001, 0.01, 1],
1555	mesh_file=None,
1556	mesh_resolution=3,
1557	north_boundary=5,
1558	south_boundary=-5,
1559	east_boundary=5,
1560	west_boundary=-5,
1561	plot_name='all_alphas',
1562	seed_num=100000,
1563	verbose=False)
1564
1565	OUTPUT: returns the minumum normalised covalance calculate AND the
1566	alpha that created it. PLUS writes a plot of the results
1567
1568	NOTE: code will not work if the data_file extent is greater than the
1569	boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1570	"""
1571
1572	from anuga.shallow_water import Domain
1573	from anuga.geospatial_data.geospatial_data import Geospatial_data
1574	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1575	from anuga.utilities.numerical_tools import cov
1576	from anuga.utilities.polygon import is_inside_polygon
1577	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1578
1579
1580	attribute_smoothed='elevation'
1581
1582	if mesh_file is None:
1583	if verbose: print "building mesh"
1584	mesh_file = 'temp.msh'
1585
1586	if north_boundary is None or south_boundary is None \
1587	or east_boundary is None or west_boundary is None:
1588	no_boundary = True
1589	else:
1590	no_boundary = False
1591
1592	if no_boundary is True:
1593	msg = 'All boundaries must be defined'
1594	raise Expection, msg
1595
1596	poly_topo = [[east_boundary,south_boundary],
1597	[east_boundary,north_boundary],
1598	[west_boundary,north_boundary],
1599	[west_boundary,south_boundary]]
1600
1601	create_mesh_from_regions(poly_topo,
1602	boundary_tags={'back': [2],
1603	'side': [1,3],
1604	'ocean': [0]},
1605	maximum_triangle_area=mesh_resolution,
1606	filename=mesh_file,
1607	use_cache=cache,
1608	verbose=verbose)
1609
1610	else: # if mesh file provided
1611	# test mesh file exists?
1612	if verbose: "reading from file: %s" % mesh_file
1613	if access(mesh_file,F_OK) == 0:
1614	msg = "file %s doesn't exist!" % mesh_file
1615	raise IOError, msg
1616
1617	# split topo data
1618	if verbose: print 'Reading elevation file: %s' % data_file
1619	G = Geospatial_data(file_name = data_file)
1620	if verbose: print 'Start split'
1621	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1622	if verbose: print 'Finish split'
1623	points = G_small.get_data_points()
1624
1625	if verbose: print "Number of points in sample to compare: ", len(points)
1626
1627	if alpha_list == None:
1628	alphas = [0.001,0.01,100]
1629	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1630	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1631	else:
1632	alphas = alpha_list
1633
1634	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1635	# 4 onwards is the elevation_predicted using the alpha, which will
1636	# be compared later against the real removed data
1637	data = num.array([], dtype=num.float)
1638
1639	data=num.resize(data, (len(points), 3+len(alphas)))
1640
1641	# gets relative point from sample
1642	data[:,0] = points[:,0]
1643	data[:,1] = points[:,1]
1644	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1645	data[:,2] = elevation_sample
1646
1647	normal_cov=num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1648
1649	if verbose: print 'Setup computational domains with different alphas'
1650
1651	for i, alpha in enumerate(alphas):
1652	# add G_other data to domains with different alphas
1653	if verbose:
1654	print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1655	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1656	if verbose: print domain.statistics()
1657	domain.set_quantity(attribute_smoothed,
1658	geospatial_data=G_other,
1659	use_cache=cache,
1660	verbose=verbose,
1661	alpha=alpha)
1662
1663	# Convert points to geospatial data for use with get_values below
1664	points_geo = Geospatial_data(points, domain.geo_reference)
1665
1666	# returns the predicted elevation of the points that were "split" out
1667	# of the original data set for one particular alpha
1668	if verbose: print 'Get predicted elevation for location to be compared'
1669	elevation_predicted = \
1670	domain.quantities[attribute_smoothed].\
1671	get_values(interpolation_points=points_geo)
1672
1673	# add predicted elevation to array that starts with x, y, z...
1674	data[:,i+3] = elevation_predicted
1675
1676	sample_cov = cov(elevation_sample)
1677	ele_cov = cov(elevation_sample - elevation_predicted)
1678	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1679
1680	if verbose:
1681	print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1682	normal_cov[i][1]
1683	print '-------------------------------------------- \n'
1684
1685	normal_cov0 = normal_cov[:,0]
1686	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1687
1688	if plot_name is not None:
1689	from pylab import savefig, semilogx, loglog
1690
1691	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1692	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1693	savefig(plot_name, dpi=300)
1694
1695	if mesh_file == 'temp.msh':
1696	remove(mesh_file)
1697
1698	if verbose:
1699	print 'Final results:'
1700	for i, alpha in enumerate(alphas):
1701	print 'covariance for alpha %s = %s ' \
1702	% (normal_cov[i][0], normal_cov[i][1])
1703	print '\n Optimal alpha is: %s ' \
1704	% normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0]
1705
1706	# covariance and optimal alpha
1707	return (min(normal_cov_new[:,1]),
1708	normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1709
1710
1711	##
1712	# @brief
1713	# @param data_file
1714	# @param alpha_list
1715	# @param mesh_file
1716	# @param boundary_poly
1717	# @param mesh_resolution
1718	# @param north_boundary
1719	# @param south_boundary
1720	# @param east_boundary
1721	# @param west_boundary
1722	# @param plot_name
1723	# @param split_factor
1724	# @param seed_num
1725	# @param cache
1726	# @param verbose
1727	def old_find_optimal_smoothing_parameter(data_file,
1728	alpha_list=None,
1729	mesh_file=None,
1730	boundary_poly=None,
1731	mesh_resolution=100000,
1732	north_boundary=None,
1733	south_boundary=None,
1734	east_boundary=None,
1735	west_boundary=None,
1736	plot_name='all_alphas',
1737	split_factor=0.1,
1738	seed_num=None,
1739	cache=False,
1740	verbose=False):
1741	"""
1742	data_file: must not contain points outside the boundaries defined
1743	and it either a pts, txt or csv file.
1744
1745	alpha_list: the alpha values to test in a single list
1746
1747	mesh_file: name of the created mesh file or if passed in will read it.
1748	NOTE, if there is a mesh file mesh_resolution,
1749	north_boundary, south... etc will be ignored.
1750
1751	mesh_resolution: the maximum area size for a triangle
1752
1753	north_boundary... west_boundary: the value of the boundary
1754
1755	plot_name: the name for the plot contain the results
1756
1757	seed_num: the seed to the random number generator
1758
1759	USAGE:
1760	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1761	alpha_list=[0.0001, 0.01, 1],
1762	mesh_file=None,
1763	mesh_resolution=3,
1764	north_boundary=5,
1765	south_boundary=-5,
1766	east_boundary=5,
1767	west_boundary=-5,
1768	plot_name='all_alphas',
1769	seed_num=100000,
1770	verbose=False)
1771
1772	OUTPUT: returns the minumum normalised covalance calculate AND the
1773	alpha that created it. PLUS writes a plot of the results
1774
1775	NOTE: code will not work if the data_file extend is greater than the
1776	boundary_polygon or the north_boundary...west_boundary
1777	"""
1778
1779	from anuga.shallow_water import Domain
1780	from anuga.geospatial_data.geospatial_data import Geospatial_data
1781	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1782	from anuga.utilities.numerical_tools import cov
1783	from anuga.utilities.polygon import is_inside_polygon
1784	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1785
1786
1787	attribute_smoothed = 'elevation'
1788
1789	if mesh_file is None:
1790	mesh_file = 'temp.msh'
1791
1792	if north_boundary is None or south_boundary is None \
1793	or east_boundary is None or west_boundary is None:
1794	no_boundary = True
1795	else:
1796	no_boundary = False
1797
1798	if no_boundary is True:
1799	msg = 'All boundaries must be defined'
1800	raise Expection, msg
1801
1802	poly_topo = [[east_boundary,south_boundary],
1803	[east_boundary,north_boundary],
1804	[west_boundary,north_boundary],
1805	[west_boundary,south_boundary]]
1806
1807	create_mesh_from_regions(poly_topo,
1808	boundary_tags={'back': [2],
1809	'side': [1,3],
1810	'ocean': [0]},
1811	maximum_triangle_area=mesh_resolution,
1812	filename=mesh_file,
1813	use_cache=cache,
1814	verbose=verbose)
1815
1816	else: # if mesh file provided
1817	# test mesh file exists?
1818	if access(mesh_file,F_OK) == 0:
1819	msg = "file %s doesn't exist!" % mesh_file
1820	raise IOError, msg
1821
1822	# split topo data
1823	G = Geospatial_data(file_name=data_file)
1824	if verbose: print 'start split'
1825	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1826	if verbose: print 'finish split'
1827	points = G_small.get_data_points()
1828
1829	# FIXME: Remove points outside boundary polygon
1830	# print 'new point',len(points)
1831	#
1832	# new_points=[]
1833	# new_points=array([],dtype=float)
1834	# new_points=resize(new_points,(len(points),2))
1835	# print "BOUNDARY", boundary_poly
1836	# for i,point in enumerate(points):
1837	# if is_inside_polygon(point,boundary_poly, verbose=True):
1838	# new_points[i] = point
1839	# print"WOW",i,new_points[i]
1840	# points = new_points
1841
1842	if verbose: print "Number of points in sample to compare: ", len(points)
1843
1844	if alpha_list == None:
1845	alphas = [0.001,0.01,100]
1846	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1847	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1848	else:
1849	alphas = alpha_list
1850
1851	domains = {}
1852
1853	if verbose: print 'Setup computational domains with different alphas'
1854
1855	for alpha in alphas:
1856	# add G_other data to domains with different alphas
1857	if verbose:
1858	print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1859	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1860	if verbose: print domain.statistics()
1861	domain.set_quantity(attribute_smoothed,
1862	geospatial_data=G_other,
1863	use_cache=cache,
1864	verbose=verbose,
1865	alpha=alpha)
1866	domains[alpha] = domain
1867
1868	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1869	# 4 onwards is the elevation_predicted using the alpha, which will
1870	# be compared later against the real removed data
1871	data = num.array([], dtype=num.float)
1872
1873	data = num.resize(data, (len(points), 3+len(alphas)))
1874
1875	# gets relative point from sample
1876	data[:,0] = points[:,0]
1877	data[:,1] = points[:,1]
1878	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1879	data[:,2] = elevation_sample
1880
1881	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1882
1883	if verbose:
1884	print 'Determine difference between predicted results and actual data'
1885	for i, alpha in enumerate(domains):
1886	if verbose: print'Alpha =', alpha
1887
1888	points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1889	# returns the predicted elevation of the points that were "split" out
1890	# of the original data set for one particular alpha
1891	elevation_predicted = \
1892	domains[alpha].quantities[attribute_smoothed].\
1893	get_values(interpolation_points=points_geo)
1894
1895	# add predicted elevation to array that starts with x, y, z...
1896	data[:,i+3] = elevation_predicted
1897
1898	sample_cov = cov(elevation_sample)
1899	ele_cov = cov(elevation_sample - elevation_predicted)
1900	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1901	print 'memory usage during compare', mem_usage()
1902	if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1903
1904	normal_cov0 = normal_cov[:,0]
1905	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1906
1907	if plot_name is not None:
1908	from pylab import savefig,semilogx,loglog
1909
1910	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1911	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1912	savefig(plot_name, dpi=300)
1913	if mesh_file == 'temp.msh':
1914	remove(mesh_file)
1915
1916	return (min(normal_cov_new[:,1]),
1917	normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1918
1919
1920	if __name__ == "__main__":
1921	pass
1922

Note: See TracBrowser for help on using the repository browser.

Download in other formats: