Context Navigation

source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6517

Last change on this file since 6517 was 6517, checked in by rwilson, 15 years ago
Hand-merged recent changes in main trunk. Still work to be done in shallow_water.
File size: 69.8 KB

Line
1	"""Class Geospatial_data
2
3	Manipulation of locations on the planet and associated attributes.
4	"""
5
6	from sys import maxint
7	from os import access, F_OK, R_OK,remove
8	from types import DictType
9	from warnings import warn
10	from string import lower
11	from RandomArray import randint, seed, get_seed
12	from copy import deepcopy
13
14	from Scientific.IO.NetCDF import NetCDFFile
15	import numpy as num
16
17	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18	from anuga.utilities.numerical_tools import ensure_numeric
19	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22	from anuga.utilities.system_tools import clean_line
23	from anuga.utilities.anuga_exceptions import ANUGAError
24	from anuga.config import points_file_block_line_size as MAX_READ_LINES
25	from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
26	from anuga.config import netcdf_float
27
28
29	DEFAULT_ATTRIBUTE = 'elevation'
30
31
32	##
33	# @brief ??
34	class Geospatial_data:
35
36	##
37	# @brief
38	# @param data_points Mx2 iterable of tuples or array of x,y coordinates.
39	# @param attributes Associated values for each data point.
40	# @param geo_reference ??
41	# @param default_attribute_name ??
42	# @param file_name
43	# @param latitudes ??
44	# @param longitudes ??
45	# @param points_are_lats_longs True if points are lat/long, not UTM.
46	# @param max_read_lines Size of block to read, if blocking.
47	# @param load_file_now True if blocking but we eant to read file now.
48	# @param verbose True if this class instance is verbose.
49	def __init__(self,
50	data_points=None, # this can also be a points file name
51	attributes=None,
52	geo_reference=None,
53	default_attribute_name=None,
54	file_name=None,
55	latitudes=None,
56	longitudes=None,
57	points_are_lats_longs=False,
58	max_read_lines=None,
59	load_file_now=True,
60	verbose=False):
61	"""Create instance from data points and associated attributes
62
63	data_points: x,y coordinates in meters. Type must be either a
64	sequence of 2-tuples or an Mx2 numeric array of floats. A file name
65	with extension .txt, .cvs or .pts can also be passed in here.
66
67	attributes: Associated values for each data point. The type
68	must be either a list or an array of length M or a dictionary
69	of lists (or arrays) of length M. In the latter case the keys
70	in the dictionary represent the attribute names, in the former
71	the attribute will get the default name "elevation".
72
73	geo_reference: Object representing the origin of the data
74	points. It contains UTM zone, easting and northing and data
75	points are assumed to be relative to this origin.
76	If geo_reference is None, the default geo ref object is used.
77
78	default_attribute_name: Name of default attribute to be used with
79	get_attribute_values. The idea is that the dataset can be
80	equipped with information about which attribute to return.
81	If None, the default is the "first"
82
83	latitudes, longitudes: Vectors of latitudes and longitudes,
84	used to specify location instead of points.
85
86	points_are_lats_longs: Set this as true if the points are actually
87	lats and longs, not UTM
88
89	max_read_lines: The number of rows read into memory when using
90	blocking to read a file.
91
92	load_file_now: If true the file is automatically loaded
93	into the geospatial instance. Used when blocking.
94
95	file_name: Name of input netCDF file or .txt file. netCDF file must
96	have dimensions "points" etc.
97	.txt file is a comma seperated file with x, y and attribute
98	data.
99
100	The first line has the titles of the columns. The first two
101	column titles are checked to see if they start with lat or
102	long (not case sensitive). If so the data is assumed to be
103	latitude and longitude, in decimal format and converted to
104	UTM. Otherwise the first two columns are assumed to be the x
105	and y, and the title names acually used are ignored.
106
107
108	The format for a .txt file is:
109	1st line: [column names]
110	other lines: x y [attributes]
111
112	for example:
113	x, y, elevation, friction
114	0.6, 0.7, 4.9, 0.3
115	1.9, 2.8, 5, 0.3
116	2.7, 2.4, 5.2, 0.3
117
118	The first two columns have to be x, y or lat, long
119	coordinates.
120
121
122	The format for a Points dictionary is:
123	['pointlist'] a 2 column array describing points. 1st column x,
124	2nd column y.
125	['attributelist'], a dictionary of 1D arrays, representing
126	attribute values at the point. The dictionary key is the attribute
127	header.
128	['geo_reference'] a Geo_refernece object. Use if the point
129	information is relative. This is optional.
130	eg
131	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
132	dic['attributelist']['elevation'] = [[7.0,5.0]
133
134	verbose:
135	"""
136
137	if isinstance(data_points, basestring):
138	# assume data_points is really a file name
139	file_name = data_points
140
141	self.set_verbose(verbose)
142	self.geo_reference = None
143	self.file_name = file_name
144
145	if max_read_lines is None:
146	self.max_read_lines = MAX_READ_LINES
147	else:
148	self.max_read_lines = max_read_lines
149
150	if file_name is None:
151	if (latitudes is not None or longitudes is not None
152	or points_are_lats_longs):
153	data_points, geo_reference = \
154	_set_using_lat_long(latitudes=latitudes,
155	longitudes=longitudes,
156	geo_reference=geo_reference,
157	data_points=data_points,
158	points_are_lats_longs=
159	points_are_lats_longs)
160	self.check_data_points(data_points)
161	self.set_attributes(attributes)
162	self.set_geo_reference(geo_reference)
163	self.set_default_attribute_name(default_attribute_name)
164	elif load_file_now is True:
165	# watch for case where file name and points,
166	# attributes etc are provided!!
167	# if file name then all provided info will be removed!
168
169	if verbose is True:
170	if file_name is not None:
171	print 'Loading Geospatial data from file: %s' % file_name
172
173	self.import_points_file(file_name, verbose=verbose)
174
175	self.check_data_points(self.data_points)
176	self.set_attributes(self.attributes)
177	self.set_geo_reference(self.geo_reference)
178	self.set_default_attribute_name(default_attribute_name)
179
180	if verbose is True:
181	if file_name is not None:
182	print 'Geospatial data created from file: %s' % file_name
183	if load_file_now is False:
184	print 'Data will be loaded blockwise on demand'
185
186	if file_name.endswith('csv') or file_name.endswith('txt'):
187	pass
188	# This message was misleading.
189	# FIXME (Ole): Are we blocking here or not?
190	# print 'ASCII formats are not that great for '
191	# print 'blockwise reading. Consider storing this'
192	# print 'data as a pts NetCDF format'
193
194	##
195	# @brief Return length of the points set.
196	def __len__(self):
197	return len(self.data_points)
198
199	##
200	# @brief Return a string representation of the points set.
201	def __repr__(self):
202	return str(self.get_data_points(absolute=True))
203
204	##
205	# @brief Check data points.
206	# @param data_points Points data to check and store in instance.
207	# @note Throws ValueError exception if no data.
208	def check_data_points(self, data_points):
209	"""Checks data points"""
210
211	if data_points is None:
212	self.data_points = None
213	msg = 'There is no data or file provided!'
214	raise ValueError, msg
215	else:
216	self.data_points = ensure_numeric(data_points)
217	if not (0,) == self.data_points.shape:
218	assert len(self.data_points.shape) == 2
219	assert self.data_points.shape[1] == 2
220
221	##
222	# @brief Check and assign attributes data.
223	# @param attributes Dictionary or scalar to save as .attributes.
224	# @note Throws exception if unable to convert dict keys to numeric.
225	def set_attributes(self, attributes):
226	"""Check and assign attributes dictionary"""
227
228	if attributes is None:
229	self.attributes = None
230	return
231
232	if not isinstance(attributes, DictType):
233	# Convert single attribute into dictionary
234	attributes = {DEFAULT_ATTRIBUTE: attributes}
235
236	# Check input attributes
237	for key in attributes.keys():
238	try:
239	attributes[key] = ensure_numeric(attributes[key])
240	except:
241	msg = ("Attribute '%s' (%s) could not be converted to a"
242	"numeric vector" % (str(key), str(attributes[key])))
243	raise Exception, msg
244
245	self.attributes = attributes
246
247	##
248	# @brief Set the georeference of geospatial data.
249	# @param geo_reference The georeference data to set.
250	# @note Will raise exception if param not instance of Geo_reference.
251	def set_geo_reference(self, geo_reference):
252	"""Set the georeference of geospatial data.
253
254	It can also be used to change the georeference and will ensure that
255	the absolute coordinate values are unchanged.
256	"""
257
258	if geo_reference is None:
259	# Use default - points are in absolute coordinates
260	geo_reference = Geo_reference()
261
262	# Allow for tuple (zone, xllcorner, yllcorner)
263	geo_reference = ensure_geo_reference(geo_reference)
264
265	if not isinstance(geo_reference, Geo_reference):
266	# FIXME (Ole): This exception will be raised even
267	# if geo_reference is None. Is that the intent Duncan?
268	msg = ('Argument geo_reference must be a valid Geo_reference '
269	'object or None.')
270	raise Expection, msg
271
272	# If a geo_reference already exists, change the point data according to
273	# the new geo reference
274	if self.geo_reference is not None:
275	self.data_points = self.get_data_points(geo_reference=geo_reference)
276
277	self.geo_reference = geo_reference
278
279	##
280	# @brief Set default attribute name.
281	# @param default_attribute_name The default to save.
282	def set_default_attribute_name(self, default_attribute_name):
283	self.default_attribute_name = default_attribute_name
284
285	##
286	# @brief Set the instance verbose flag.
287	# @param verbose The value to save.
288	# @note Will raise exception if param is not True or False.
289	def set_verbose(self, verbose=False):
290	if verbose in [False, True]:
291	self.verbose = verbose
292	else:
293	msg = 'Illegal value: %s' % str(verbose)
294	raise Exception, msg
295
296	##
297	# @brief Clip geospatial data by a given polygon.
298	# @param polygon The polygon to clip with.
299	# @param closed True if points on clip boundary are not included in result.
300	# @param verbose True if this function is verbose.
301	def clip(self, polygon, closed=True, verbose=False):
302	"""Clip geospatial data by a polygon
303
304	Input
305	polygon - Either a list of points, an Nx2 array or
306	a Geospatial data object.
307	closed - (optional) determine whether points on boundary should be
308	regarded as belonging to the polygon (closed = True)
309	or not (closed = False). Default is True.
310
311	Output
312	New geospatial data object representing points inside
313	specified polygon.
314
315
316	Note - this method is non-destructive and leaves the data in 'self'
317	unchanged
318	"""
319
320	from anuga.utilities.polygon import inside_polygon
321
322	if isinstance(polygon, Geospatial_data):
323	# Polygon is an object - extract points
324	polygon = polygon.get_data_points()
325
326	points = self.get_data_points()
327	inside_indices = inside_polygon(points, polygon, closed, verbose)
328
329	clipped_G = self.get_sample(inside_indices)
330
331	return clipped_G
332
333	##
334	# @brief Clip points data by polygon, return points outside polygon.
335	# @param polygon The polygon to clip with.
336	# @param closed True if points on clip boundary are not included in result.
337	# @param verbose True if this function is verbose.
338	def clip_outside(self, polygon, closed=True, verbose=False):
339	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
340
341	Input
342	polygon - Either a list of points, an Nx2 array or
343	a Geospatial data object.
344	closed - (optional) determine whether points on boundary should be
345	regarded as belonging to the polygon (closed = True)
346	or not (closed = False). Default is True.
347
348	Output
349	Geospatial data object representing point OUTSIDE specified polygon
350	"""
351
352	from anuga.utilities.polygon import outside_polygon
353
354	if isinstance(polygon, Geospatial_data):
355	# Polygon is an object - extract points
356	polygon = polygon.get_data_points()
357
358	points = self.get_data_points()
359	outside_indices = outside_polygon(points, polygon, closed,verbose)
360
361	clipped_G = self.get_sample(outside_indices)
362
363	return clipped_G
364
365	##
366	# @brief Get instance geo_reference data.
367	def get_geo_reference(self):
368	return self.geo_reference
369
370	##
371	# @brief Get coordinates for all data points as an Nx2 array.
372	# @param absolute If True, return UTM, else relative to xll/yll corners.
373	# @param geo_reference If supplied, points are relative to it.
374	# @param as_lat_long If True, return points as lat/lon.
375	# @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
376	# @return A set of data points, in appropriate form.
377	def get_data_points(self,
378	absolute=True,
379	geo_reference=None,
380	as_lat_long=False,
381	isSouthHemisphere=True):
382	"""Get coordinates for all data points as an Nx2 array
383
384	If absolute is False returned coordinates are relative to the
385	internal georeference's xll and yll corners, otherwise
386	absolute UTM coordinates are returned.
387
388	If a geo_reference is passed the points are returned relative
389	to that geo_reference.
390
391	isSH (isSouthHemisphere) is only used when getting data
392	points "as_lat_long" is True and if FALSE will return lats and
393	longs valid for the Northern Hemisphere.
394
395	Default: absolute is True.
396	"""
397
398	if as_lat_long is True:
399	msg = "Points need a zone to be converted into lats and longs"
400	assert self.geo_reference is not None, msg
401	zone = self.geo_reference.get_zone()
402	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
403	lats_longs = []
404	for point in self.get_data_points(True):
405	# UTMtoLL(northing, easting, zone,
406	lat_calced, long_calced = UTMtoLL(point[1], point[0],
407	zone, isSouthHemisphere)
408	lats_longs.append((lat_calced, long_calced)) # to hash
409	return lats_longs
410
411	if absolute is True and geo_reference is None:
412	return self.geo_reference.get_absolute(self.data_points)
413	elif geo_reference is not None:
414	return geo_reference.change_points_geo_ref(self.data_points,
415	self.geo_reference)
416	else:
417	# If absolute is False
418	return self.data_points
419
420	##
421	# @brief Get value for attribute name.
422	# @param attribute_name Name to get value for.
423	# @note If name passed is None, return default attribute value.
424	def get_attributes(self, attribute_name=None):
425	"""Return values for one named attribute.
426
427	If attribute_name is None, default_attribute_name is used
428	"""
429
430	if attribute_name is None:
431	if self.default_attribute_name is not None:
432	attribute_name = self.default_attribute_name
433	else:
434	attribute_name = self.attributes.keys()[0]
435	# above line takes the first one from keys
436
437	if self.verbose is True:
438	print 'Using attribute %s' %attribute_name
439	print 'Available attributes: %s' %(self.attributes.keys())
440
441	msg = 'Attribute name %s does not exist in data set' % attribute_name
442	assert self.attributes.has_key(attribute_name), msg
443
444	return self.attributes[attribute_name]
445
446	##
447	# @brief Get all instance attributes.
448	# @return The instance attribute dictionary, or None if no attributes.
449	def get_all_attributes(self):
450	"""Return values for all attributes.
451	The return value is either None or a dictionary (possibly empty).
452	"""
453
454	return self.attributes
455
456	##
457	# @brief Override __add__() to allow addition of geospatial objects.
458	# @param self This object.
459	# @param other The second object.
460	# @return The new geospatial object.
461	def __add__(self, other):
462	"""Returns the addition of 2 geospatial objects,
463	objects are concatencated to the end of each other
464
465	NOTE: doesn't add if objects contain different
466	attributes
467
468	Always return absolute points!
469	This also means, that if you add None to the object,
470	it will be turned into absolute coordinates
471
472	other can be None in which case nothing is added to self.
473	"""
474
475	# find objects zone and checks if the same
476	geo_ref1 = self.get_geo_reference()
477	zone1 = geo_ref1.get_zone()
478
479	if other is not None:
480	geo_ref2 = other.get_geo_reference()
481	zone2 = geo_ref2.get_zone()
482	geo_ref1.reconcile_zones(geo_ref2)
483	new_points = num.concatenate((self.get_data_points(absolute=True),
484	other.get_data_points(absolute=True)),
485	axis = 0)
486
487	# Concatenate attributes if any
488	if self.attributes is None:
489	if other.attributes is not None:
490	msg = ('Geospatial data must have the same '
491	'attributes to allow addition.')
492	raise Exception, msg
493
494	new_attributes = None
495	else:
496	new_attributes = {}
497	for x in self.attributes.keys():
498	if other.attributes.has_key(x):
499	attrib1 = self.attributes[x]
500	attrib2 = other.attributes[x]
501	new_attributes[x] = num.concatenate((attrib1, attrib2),
502	axis=0) #??default#
503	else:
504	msg = ('Geospatial data must have the same '
505	'attributes to allow addition.')
506	raise Exception, msg
507	else:
508	# other is None:
509	new_points = self.get_data_points(absolute=True)
510	new_attributes = self.attributes
511
512	# Instantiate new data object and return absolute coordinates
513	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
514	return Geospatial_data(new_points, new_attributes, new_geo_ref)
515
516	##
517	# @brief Override the addition case where LHS isn't geospatial object.
518	# @param self This object.
519	# @param other The second object.
520	# @return The new geospatial object.
521	def __radd__(self, other):
522	"""Handle cases like None + Geospatial_data(...)"""
523
524	return self + other
525
526	################################################################################
527	# IMPORT/EXPORT POINTS FILES
528	################################################################################
529
530	##
531	# @brief Import a .txt, .csv or .pts points data file.
532	# @param file_name
533	# @param delimiter
534	# @param verbose True if this function is to be verbose.
535	# @note Will throw IOError or SyntaxError if there is a problem.
536	def import_points_file(self, file_name, delimiter=None, verbose=False):
537	""" load an .txt, .csv or .pts file
538
539	Note: will throw an IOError/SyntaxError if it can't load the file.
540	Catch these!
541
542	Post condition: self.attributes dictionary has been set
543	"""
544
545	if access(file_name, F_OK) == 0 :
546	msg = 'File %s does not exist or is not accessible' % file_name
547	raise IOError, msg
548
549	attributes = {}
550	if file_name[-4:] == ".pts":
551	try:
552	data_points, attributes, geo_reference = \
553	_read_pts_file(file_name, verbose)
554	except IOError, e:
555	msg = 'Could not open file %s ' % file_name
556	raise IOError, msg
557	elif file_name[-4:] == ".txt" or file_name[-4:]== ".csv":
558	try:
559	data_points, attributes, geo_reference = \
560	_read_csv_file(file_name, verbose)
561	except IOError, e:
562	# This should only be if a file is not found
563	msg = ('Could not open file %s. Check the file location.'
564	% file_name)
565	raise IOError, msg
566	except SyntaxError, e:
567	# This should only be if there is a format error
568	msg = ('Problem with format of file %s.\n%s'
569	% (file_name, Error_message['IOError']))
570	raise SyntaxError, msg
571	else:
572	msg = 'Extension %s is unknown' % file_name[-4:]
573	raise IOError, msg
574
575	self.data_points = data_points
576	self.attributes = attributes
577	self.geo_reference = geo_reference
578
579	##
580	# @brief Write points data to a file (.csv or .pts).
581	# @param file_name Path to file to write.
582	# @param absolute ??
583	# @param as_lat_long ??
584	# @param isSouthHemisphere ??
585	def export_points_file(self, file_name, absolute=True,
586	as_lat_long=False, isSouthHemisphere=True):
587	"""write a points file as a text (.csv) or binary (.pts) file
588
589	file_name is the file name, including the extension
590	The point_dict is defined at the top of this file.
591
592	If absolute is True data the xll and yll are added to the points value
593	and the xll and yll of the geo_reference are set to 0.
594
595	If absolute is False data points at returned as relative to the xll
596	and yll and geo_reference remains uneffected
597
598	isSouthHemisphere: is only used when getting data
599	points "as_lat_long" is True and if FALSE will return lats and
600	longs valid for the Northern Hemisphere.
601	"""
602
603	if (file_name[-4:] == ".pts"):
604	if absolute is True:
605	geo_ref = deepcopy(self.geo_reference)
606	geo_ref.xllcorner = 0
607	geo_ref.yllcorner = 0
608	_write_pts_file(file_name,
609	self.get_data_points(absolute),
610	self.get_all_attributes(),
611	geo_ref)
612	else:
613	_write_pts_file(file_name,
614	self.get_data_points(absolute),
615	self.get_all_attributes(),
616	self.get_geo_reference())
617	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
618	msg = "ERROR: trying to write a .txt file with relative data."
619	assert absolute, msg
620	_write_csv_file(file_name,
621	self.get_data_points(absolute=True,
622	as_lat_long=as_lat_long,
623	isSouthHemisphere=isSouthHemisphere),
624	self.get_all_attributes(),
625	as_lat_long=as_lat_long)
626	elif file_name[-4:] == ".urs" :
627	msg = "ERROR: Can not write a .urs file as a relative file."
628	assert absolute, msg
629	_write_urs_file(file_name,
630	self.get_data_points(as_lat_long=True,
631	isSouthHemisphere=isSouthHemisphere))
632	else:
633	msg = 'Unknown file type %s ' %file_name
634	raise IOError, msg
635
636	##
637	# @brief Get a subset of data that is referred to by 'indices'.
638	# @param indices A list of indices to select data subset with.
639	# @return A geospatial object containing data subset.
640	def get_sample(self, indices):
641	""" Returns a object which is a subset of the original
642	and the data points and attributes in this new object refer to
643	the indices provided
644
645	Input
646	indices- a list of integers that represent the new object
647	Output
648	New geospatial data object representing points specified by
649	the indices
650	"""
651
652	# FIXME: add the geo_reference to this
653	points = self.get_data_points()
654	sampled_points = num.take(points, indices, axis=0)
655
656	attributes = self.get_all_attributes()
657
658	sampled_attributes = {}
659	if attributes is not None:
660	for key, att in attributes.items():
661	sampled_attributes[key] = num.take(att, indices, axis=0)
662
663	return Geospatial_data(sampled_points, sampled_attributes)
664
665	##
666	# @brief Split one geospatial object into two.
667	# @param factor Relative size to make first result object.
668	# @param seed_num Random 'seed' - used only for unit test.
669	# @param verbose True if this function is to be verbose.
670	# @note Points in each result object are selected randomly.
671	def split(self, factor=0.5, seed_num=None, verbose=False):
672	"""Returns two geospatial_data object, first is the size of the 'factor'
673	smaller the original and the second is the remainder. The two
674	new objects are disjoint sets of each other.
675
676	Points of the two new object have selected RANDOMLY.
677
678	This method create two lists of indices which are passed into
679	get_sample. The lists are created using random numbers, and
680	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
681	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
682
683	Input - the factor which to split the object, if 0.1 then 10% of the
684	together object will be returned
685
686	Output - two geospatial_data objects that are disjoint sets of the
687	original
688	"""
689
690	i = 0
691	self_size = len(self)
692	random_list = []
693	remainder_list = []
694	new_size = round(factor * self_size)
695
696	# Find unique random numbers
697	if verbose: print "make unique random number list and get indices"
698
699	total = num.array(range(self_size), num.int) #array default#
700	total_list = total.tolist()
701
702	if verbose: print "total list len", len(total_list)
703
704	# There will be repeated random numbers however will not be a
705	# problem as they are being 'pop'ed out of array so if there
706	# are two numbers the same they will pop different indicies,
707	# still basically random
708	## create list of non-unquie random numbers
709	if verbose: print "create random numbers list %s long" %new_size
710
711	# Set seed if provided, mainly important for unit test!
712	# plus recalcule seed when no seed provided.
713	if seed_num is not None:
714	seed(seed_num, seed_num)
715	else:
716	seed()
717
718	if verbose: print "seed:", get_seed()
719
720	random_num = randint(0, self_size-1, (int(new_size),))
721	random_num = random_num.tolist()
722
723	# need to sort and reverse so the pop() works correctly
724	random_num.sort()
725	random_num.reverse()
726
727	if verbose: print "make random number list and get indices"
728
729	j = 0
730	k = 1
731	remainder_list = total_list[:]
732
733	# pops array index (random_num) from remainder_list
734	# (which starts as the total_list and appends to random_list)
735	random_num_len = len(random_num)
736	for i in random_num:
737	random_list.append(remainder_list.pop(i))
738	j += 1
739	# prints progress
740	if verbose and round(random_num_len/10*k) == j:
741	print '(%s/%s)' % (j, random_num_len)
742	k += 1
743
744	# FIXME: move to tests, it might take a long time
745	# then create an array of random length between 500 and 1000,
746	# and use a random factor between 0 and 1
747	# setup for assertion
748	test_total = random_list[:]
749	test_total.extend(remainder_list)
750	test_total.sort()
751	msg = ('The two random lists made from the original list when added '
752	'together DO NOT equal the original list')
753	assert total_list == test_total, msg
754
755	# Get new samples
756	if verbose: print "get values of indices for random list"
757	G1 = self.get_sample(random_list)
758	if verbose: print "get values of indices for opposite of random list"
759	G2 = self.get_sample(remainder_list)
760
761	return G1, G2
762
763	##
764	# @brief Allow iteration over this object.
765	def __iter__(self):
766	"""Read in the header, number_of_points and save the
767	file pointer position
768	"""
769
770	# FIXME - what to do if the file isn't there
771
772	# FIXME (Ole): Shouldn't this go into the constructor?
773	# This method acts like the constructor when blocking.
774	# ... and shouldn't it be called block_size?
775	#
776	if self.max_read_lines is None:
777	self.max_read_lines = MAX_READ_LINES
778
779	if self.file_name[-4:] == ".pts":
780	# See if the file is there. Throw a QUIET IO error if it isn't
781	fd = open(self.file_name,'r')
782	fd.close()
783
784	# Throws prints to screen if file not present
785	self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
786
787	(self.blocking_georef,
788	self.blocking_keys,
789	self.number_of_points) = _read_pts_file_header(self.fid,
790	self.verbose)
791	self.start_row = 0
792	self.last_row = self.number_of_points
793	self.show_verbose = 0
794	self.verbose_block_size = (self.last_row + 10)/10
795	self.block_number = 0
796	self.number_of_blocks = self.number_of_points/self.max_read_lines
797	# This computes the number of full blocks. The last block may be
798	# smaller and won't be included in this estimate.
799
800	if self.verbose is True:
801	print ('Reading %d points (in ~%d blocks) from file %s. '
802	% (self.number_of_points, self.number_of_blocks,
803	self.file_name)),
804	print ('Each block consists of %d data points'
805	% self.max_read_lines)
806	else:
807	# Assume the file is a csv file
808	file_pointer = open(self.file_name)
809	self.header, self.file_pointer = _read_csv_file_header(file_pointer)
810	self.blocking_georef = None # Used for reconciling zones
811
812	return self
813
814	##
815	# @brief Read another block into the instance.
816	def next(self):
817	"""read a block, instanciate a new geospatial and return it"""
818
819	if self.file_name[-4:] == ".pts":
820	if self.start_row == self.last_row:
821	# Read the end of the file last iteration
822	# Remove blocking attributes
823	self.fid.close()
824	del self.max_read_lines
825	del self.blocking_georef
826	del self.last_row
827	del self.start_row
828	del self.blocking_keys
829	del self.fid
830	raise StopIteration
831	fin_row = self.start_row + self.max_read_lines
832	if fin_row > self.last_row:
833	fin_row = self.last_row
834
835	if self.verbose is True:
836	if (self.show_verbose >= self.start_row
837	and self.show_verbose < fin_row):
838	print ('Reading block %d (points %d to %d) out of %d'
839	% (self.block_number, self.start_row,
840	fin_row, self.number_of_blocks))
841
842	self.show_verbose += max(self.max_read_lines,
843	self.verbose_block_size)
844
845	# Read next block
846	pointlist, att_dict, = _read_pts_file_blocking(self.fid,
847	self.start_row,
848	fin_row,
849	self.blocking_keys)
850
851	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
852	self.start_row = fin_row
853
854	self.block_number += 1
855	else:
856	# Assume the file is a csv file
857	try:
858	(pointlist,
859	att_dict,
860	geo_ref,
861	self.file_pointer) = _read_csv_file_blocking(self.file_pointer,
862	self.header[:],
863	max_read_lines= \
864	self.max_read_lines,
865	verbose= \
866	self.verbose)
867
868	# Check that the zones haven't changed.
869	if geo_ref is not None:
870	geo_ref.reconcile_zones(self.blocking_georef)
871	self.blocking_georef = geo_ref
872	elif self.blocking_georef is not None:
873	msg = ('Geo reference given, then not given.'
874	' This should not happen.')
875	raise ValueError, msg
876	geo = Geospatial_data(pointlist, att_dict, geo_ref)
877	except StopIteration:
878	self.file_pointer.close()
879	del self.header
880	del self.file_pointer
881	raise StopIteration
882	except ANUGAError:
883	self.file_pointer.close()
884	del self.header
885	del self.file_pointer
886	raise
887	except SyntaxError:
888	self.file_pointer.close()
889	del self.header
890	del self.file_pointer
891	# This should only be if there is a format error
892	msg = ('Could not open file %s.\n%s'
893	% (self.file_name, Error_message['IOError']))
894	raise SyntaxError, msg
895	return geo
896
897	##################### Error messages ###########
898	Error_message = {}
899	Em = Error_message
900	Em['IOError'] = ('NOTE: The format for a comma separated .txt/.csv file is:\n'
901	' 1st line: [column names]\n'
902	' other lines: [x value], [y value], [attributes]\n'
903	'\n'
904	' for example:\n'
905	' x, y, elevation, friction\n'
906	' 0.6, 0.7, 4.9, 0.3\n'
907	' 1.9, 2.8, 5, 0.3\n'
908	' 2.7, 2.4, 5.2, 0.3\n'
909	'\n'
910	'The first two columns are assumed to be x, y coordinates.\n'
911	'The attribute values must be numeric.\n')
912
913	##
914	# @brief ??
915	# @param latitudes ??
916	# @param longitudes ??
917	# @param geo_reference ??
918	# @param data_points ??
919	# @param points_are_lats_longs ??
920	# @note IS THIS USED???
921	def _set_using_lat_long(latitudes,
922	longitudes,
923	geo_reference,
924	data_points,
925	points_are_lats_longs):
926	"""If the points has lat long info, assume it is in (lat, long) order."""
927
928	if geo_reference is not None:
929	msg = ('A georeference is specified yet latitude and longitude '
930	'are also specified!')
931	raise ValueError, msg
932
933	if data_points is not None and not points_are_lats_longs:
934	msg = ('Data points are specified yet latitude and longitude are '
935	'also specified.')
936	raise ValueError, msg
937
938	if points_are_lats_longs:
939	if data_points is None:
940	msg = "Data points are not specified."
941	raise ValueError, msg
942	lats_longs = ensure_numeric(data_points)
943	latitudes = num.ravel(lats_longs[:,0:1])
944	longitudes = num.ravel(lats_longs[:,1:])
945
946	if latitudes is None and longitudes is None:
947	msg = "Latitudes and Longitudes are not specified."
948	raise ValueError, msg
949
950	if latitudes is None:
951	msg = "Longitudes are specified yet latitudes aren't."
952	raise ValueError, msg
953
954	if longitudes is None:
955	msg = "Latitudes are specified yet longitudes aren't."
956	raise ValueError, msg
957
958	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
959	longitudes=longitudes)
960	return data_points, Geo_reference(zone=zone)
961
962
963	##
964	# @brief Read a .pts data file.
965	# @param file_name Path to file to read.
966	# @param verbose True if this function is to be verbose.
967	# @return (pointlist, attributes, geo_reference)
968	def _read_pts_file(file_name, verbose=False):
969	"""Read .pts NetCDF file
970
971	Return a (dict_points, dict_attribute, geo_ref)
972	eg
973	dict['points'] = [[1.0,2.0],[3.0,5.0]]
974	dict['attributelist']['elevation'] = [[7.0,5.0]]
975	"""
976
977	if verbose: print 'Reading ', file_name
978
979	# See if the file is there. Throw a QUIET IO error if it isn't
980	fd = open(file_name,'r')
981	fd.close()
982
983	# Throws prints to screen if file not present
984	fid = NetCDFFile(file_name, netcdf_mode_r)
985
986	pointlist = num.array(fid.variables['points'])
987	keys = fid.variables.keys()
988
989	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
990
991	try:
992	keys.remove('points')
993	except IOError, e:
994	fid.close()
995	msg = "Expected keyword 'points' but could not find it"
996	raise IOError, msg
997
998	attributes = {}
999	for key in keys:
1000	if verbose: print "reading attribute '%s'" % key
1001
1002	attributes[key] = num.array(fid.variables[key])
1003
1004	try:
1005	geo_reference = Geo_reference(NetCDFObject=fid)
1006	except AttributeError, e:
1007	geo_reference = None
1008
1009	fid.close()
1010
1011	return pointlist, attributes, geo_reference
1012
1013
1014	##
1015	# @brief Read a .csv data file.
1016	# @param file_name Path to the .csv file to read.
1017	# @param verbose True if this function is to be verbose.
1018	def _read_csv_file(file_name, verbose=False):
1019	"""Read .csv file
1020
1021	Return a dic of array of points, and dic of array of attribute
1022	eg
1023	dic['points'] = [[1.0,2.0],[3.0,5.0]]
1024	dic['attributelist']['elevation'] = [[7.0,5.0]]
1025	"""
1026
1027	file_pointer = open(file_name)
1028	header, file_pointer = _read_csv_file_header(file_pointer)
1029	try:
1030	(pointlist,
1031	att_dict,
1032	geo_ref,
1033	file_pointer) = _read_csv_file_blocking(file_pointer,
1034	header,
1035	max_read_lines=1e30)
1036	# If the file is bigger that this, block..
1037	# FIXME (Ole) What's up here?
1038	except ANUGAError:
1039	file_pointer.close()
1040	raise
1041
1042	file_pointer.close()
1043
1044	return pointlist, att_dict, geo_ref
1045
1046
1047	##
1048	# @brief Read a .csv file header.
1049	# @param file_pointer Open descriptor of the file to read.
1050	# @param delimiter Header line delimiter string, split on this string.
1051	# @param verbose True if this function is to be verbose.
1052	# @return A tuple of (<cleaned header string>, <input file_pointer>)
1053
1054	CSV_DELIMITER = ','
1055
1056	def _read_csv_file_header(file_pointer,
1057	delimiter=CSV_DELIMITER,
1058	verbose=False):
1059	"""Read the header of a .csv file
1060	Return a list of the header names
1061	"""
1062
1063	line = file_pointer.readline()
1064	header = clean_line(line, delimiter)
1065
1066	return header, file_pointer
1067
1068	##
1069	# @brief Read a .csv file, with blocking.
1070	# @param file_pointer Open descriptor of the file to read.
1071	# @param header List of already read .csv header fields.
1072	# @param delimiter Delimiter string header was split on.
1073	# @param max_read_lines The max number of lines to read before blocking.
1074	# @param verbose True if this function is to be verbose.
1075	# @note Will throw IndexError, SyntaxError exceptions.
1076	def _read_csv_file_blocking(file_pointer,
1077	header,
1078	delimiter=CSV_DELIMITER,
1079	max_read_lines=MAX_READ_LINES,
1080	verbose=False):
1081	"""Read the body of a .csv file.
1082	header: The list header of the csv file, with the x and y labels.
1083	"""
1084
1085	points = []
1086	pointattributes = []
1087	att_dict = {}
1088
1089	# This is to remove the x and y headers.
1090	header = header[:]
1091	try:
1092	x_header = header.pop(0)
1093	y_header = header.pop(0)
1094	except IndexError:
1095	# if there are not two columns this will occur.
1096	# eg if it is a space seperated file
1097	raise SyntaxError
1098
1099	read_lines = 0
1100	while read_lines < max_read_lines:
1101	line = file_pointer.readline()
1102	numbers = clean_line(line, delimiter)
1103	if len(numbers) <= 1:
1104	break
1105	if line[0] == '#':
1106	continue
1107
1108	read_lines += 1
1109
1110	try:
1111	x = float(numbers[0])
1112	y = float(numbers[1])
1113	points.append([x,y])
1114	numbers.pop(0)
1115	numbers.pop(0)
1116	if len(header) != len(numbers):
1117	file_pointer.close()
1118	msg = ('File load error. '
1119	'There might be a problem with the file header.')
1120	raise SyntaxError, msg
1121	for i,n in enumerate(numbers):
1122	n.strip()
1123	if n != '\n' and n != '':
1124	att_dict.setdefault(header[i],[]).append(float(n))
1125	except ValueError:
1126	raise SyntaxError
1127
1128	if points == []:
1129	raise StopIteration
1130
1131	pointlist = num.array(points, num.float)
1132	for key in att_dict.keys():
1133	att_dict[key] = num.array(att_dict[key], num.float)
1134
1135	# Do stuff here so the info is in lat's and longs
1136	geo_ref = None
1137	x_header = lower(x_header[:3])
1138	y_header = lower(y_header[:3])
1139	if (x_header == 'lon' or x_header == 'lat') \
1140	and (y_header == 'lon' or y_header == 'lat'):
1141	if x_header == 'lon':
1142	longitudes = num.ravel(pointlist[:,0:1])
1143	latitudes = num.ravel(pointlist[:,1:])
1144	else:
1145	latitudes = num.ravel(pointlist[:,0:1])
1146	longitudes = num.ravel(pointlist[:,1:])
1147
1148	pointlist, geo_ref = _set_using_lat_long(latitudes,
1149	longitudes,
1150	geo_reference=None,
1151	data_points=None,
1152	points_are_lats_longs=False)
1153
1154	return pointlist, att_dict, geo_ref, file_pointer
1155
1156
1157	##
1158	# @brief Read a .pts file header.
1159	# @param fid Handle to the open .pts file.
1160	# @param verbose True if the function is to be verbose.
1161	# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1162	# @note Will throw IOError and AttributeError exceptions.
1163	def _read_pts_file_header(fid, verbose=False):
1164	'''Read the geo_reference and number_of_points from a .pts file'''
1165
1166	keys = fid.variables.keys()
1167	try:
1168	keys.remove('points')
1169	except IOError, e:
1170	fid.close()
1171	msg = "Expected keyword 'points' but could not find it."
1172	raise IOError, msg
1173
1174	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1175
1176	try:
1177	geo_reference = Geo_reference(NetCDFObject=fid)
1178	except AttributeError, e:
1179	geo_reference = None
1180
1181	return geo_reference, keys, fid.dimensions['number_of_points']
1182
1183
1184	##
1185	# @brief Read the body of a .pts file, with blocking.
1186	# @param fid Handle to already open file.
1187	# @param start_row Start row index of points to return.
1188	# @param fin_row End row index of points to return.
1189	# @param keys Iterable of keys to return.
1190	# @return Tuple of (pointlist, attributes).
1191	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1192	'''Read the body of a .pts file.'''
1193
1194	pointlist = num.array(fid.variables['points'][start_row:fin_row])
1195
1196	attributes = {}
1197	for key in keys:
1198	attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1199
1200	return pointlist, attributes
1201
1202
1203	##
1204	# @brief Write a .pts data file.
1205	# @param file_name Path to the file to write.
1206	# @param write_data_points Data points to write.
1207	# @param write_attributes Attributes to write.
1208	# @param write_geo_reference Georef to write.
1209	def _write_pts_file(file_name,
1210	write_data_points,
1211	write_attributes=None,
1212	write_geo_reference=None):
1213	"""Write .pts NetCDF file
1214
1215	NOTE: Below might not be valid ask Duncan : NB 5/2006
1216
1217	WARNING: This function mangles the point_atts data structure
1218	# F??ME: (DSG)This format has issues.
1219	# There can't be an attribute called points
1220	# consider format change
1221	# method changed by NB not sure if above statement is correct
1222
1223	should create new test for this
1224	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1225	for key in point_atts.keys():
1226	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1227	assert key in legal_keys, msg
1228	"""
1229
1230	# NetCDF file definition
1231	outfile = NetCDFFile(file_name, netcdf_mode_w)
1232
1233	# Create new file
1234	outfile.institution = 'Geoscience Australia'
1235	outfile.description = ('NetCDF format for compact and portable storage '
1236	'of spatial point data')
1237
1238	# Dimension definitions
1239	shape = write_data_points.shape[0]
1240	outfile.createDimension('number_of_points', shape)
1241	outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1242
1243	# Variable definition
1244	outfile.createVariable('points', netcdf_float,
1245	('number_of_points', 'number_of_dimensions'))
1246
1247	# create variables
1248	outfile.variables['points'][:] = write_data_points
1249
1250	if write_attributes is not None:
1251	for key in write_attributes.keys():
1252	outfile.createVariable(key, netcdf_float, ('number_of_points',))
1253	outfile.variables[key][:] = write_attributes[key]
1254
1255	if write_geo_reference is not None:
1256	write_NetCDF_georeference(write_geo_reference, outfile)
1257
1258	outfile.close()
1259
1260
1261	##
1262	# @brief Write a .csv data file.
1263	# @param file_name Path to the file to write.
1264	# @param write_data_points Data points to write.
1265	# @param write_attributes Attributes to write.
1266	# @param as_lat_long True if points are lat/lon, else x/y.
1267	# @param delimiter The CSV delimiter to use.
1268	def _write_csv_file(file_name,
1269	write_data_points,
1270	write_attributes=None,
1271	as_lat_long=False,
1272	delimiter=','):
1273	"""Write a .csv file."""
1274
1275	points = write_data_points
1276	pointattributes = write_attributes
1277
1278	fd = open(file_name, 'w')
1279
1280	if as_lat_long:
1281	titlelist = "latitude" + delimiter + "longitude" + delimiter
1282	else:
1283	titlelist = "x" + delimiter + "y" + delimiter
1284
1285	if pointattributes is not None:
1286	for title in pointattributes.keys():
1287	titlelist = titlelist + title + delimiter
1288	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1289
1290	fd.write(titlelist + "\n")
1291
1292	# <x/lat> <y/long> [attributes]
1293	for i, vert in enumerate( points):
1294	if pointattributes is not None:
1295	attlist = ","
1296	for att in pointattributes.keys():
1297	attlist = attlist + str(pointattributes[att][i]) + delimiter
1298	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1299	attlist.strip()
1300	else:
1301	attlist = ''
1302
1303	fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1304
1305	fd.close()
1306
1307
1308	##
1309	# @brief Write a URS file.
1310	# @param file_name The path of the file to write.
1311	# @param points
1312	# @param delimiter
1313	def _write_urs_file(file_name, points, delimiter=' '):
1314	"""Write a URS format file.
1315	export a file, file_name, with the urs format
1316	the data points are in lats and longs
1317	"""
1318
1319	fd = open(file_name, 'w')
1320
1321	# first line is # points
1322	fd.write(str(len(points)) + "\n")
1323
1324	# <lat> <long> <id#>
1325	for i, vert in enumerate( points):
1326	fd.write(str(round(vert[0],7)) + delimiter +
1327	str(round(vert[1],7)) + delimiter + str(i) + "\n")
1328
1329	fd.close()
1330
1331
1332	##
1333	# @brief ??
1334	# @param point_atts ??
1335	# @return ??
1336	def _point_atts2array(point_atts):
1337	point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1338
1339	for key in point_atts['attributelist'].keys():
1340	point_atts['attributelist'][key] = \
1341	num.array(point_atts['attributelist'][key], num.float)
1342
1343	return point_atts
1344
1345
1346	##
1347	# @brief Convert geospatial object to a points dictionary.
1348	# @param geospatial_data The geospatial object to convert.
1349	# @return A points dictionary.
1350	def geospatial_data2points_dictionary(geospatial_data):
1351	"""Convert geospatial data to points_dictionary"""
1352
1353	points_dictionary = {}
1354	points_dictionary['pointlist'] = geospatial_data.data_points
1355
1356	points_dictionary['attributelist'] = {}
1357
1358	for attribute_name in geospatial_data.attributes.keys():
1359	val = geospatial_data.attributes[attribute_name]
1360	points_dictionary['attributelist'][attribute_name] = val
1361
1362	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1363
1364	return points_dictionary
1365
1366
1367	##
1368	# @brief Convert a points dictionary to a geospatial object.
1369	# @param points_dictionary A points dictionary to convert.
1370	def points_dictionary2geospatial_data(points_dictionary):
1371	"""Convert points_dictionary to geospatial data object"""
1372
1373	msg = "Points dictionary must have key 'pointlist'"
1374	assert points_dictionary.has_key('pointlist'), msg
1375
1376	msg = "Points dictionary must have key 'attributelist'"
1377	assert points_dictionary.has_key('attributelist'), msg
1378
1379	if points_dictionary.has_key('geo_reference'):
1380	geo = points_dictionary['geo_reference']
1381	else:
1382	geo = None
1383
1384	return Geospatial_data(points_dictionary['pointlist'],
1385	points_dictionary['attributelist'],
1386	geo_reference=geo)
1387
1388
1389	##
1390	# @brief Ensure that points are in absolute coordinates.
1391	# @param points A list or array of points to check, or geospatial object.
1392	# @param geo_reference If supplied,
1393	# @return ??
1394	def ensure_absolute(points, geo_reference=None):
1395	"""Ensure that points are in absolute coordinates.
1396
1397	This function inputs several formats and
1398	outputs one format. - a numeric array of absolute points.
1399
1400	Input formats are;
1401	points: List or numeric array of coordinate pairs [xi, eta] of
1402	points or geospatial object or points file name
1403
1404	mesh_origin: A geo_reference object or 3-tuples consisting of
1405	UTM zone, easting and northing.
1406	If specified vertex coordinates are assumed to be
1407	relative to their respective origins.
1408	"""
1409
1410	# Input check
1411	if isinstance(points, basestring):
1412	# It's a string - assume it is a point file
1413	points = Geospatial_data(file_name=points)
1414
1415	if isinstance(points, Geospatial_data):
1416	points = points.get_data_points(absolute=True)
1417	msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1418	assert geo_reference == None, msg
1419	else:
1420	points = ensure_numeric(points, num.float)
1421
1422	# Sort of geo_reference and convert points
1423	if geo_reference is None:
1424	geo = None # Geo_reference()
1425	else:
1426	if isinstance(geo_reference, Geo_reference):
1427	geo = geo_reference
1428	else:
1429	geo = Geo_reference(geo_reference[0],
1430	geo_reference[1],
1431	geo_reference[2])
1432	points = geo.get_absolute(points)
1433
1434	return points
1435
1436
1437	##
1438	# @brief
1439	# @param points
1440	# @param geo_reference
1441	# @return A geospatial object.
1442	def ensure_geospatial(points, geo_reference=None):
1443	"""Convert various data formats to a geospatial_data instance.
1444
1445	Inputed formats are;
1446	points: List or numeric array of coordinate pairs [xi, eta] of
1447	points or geospatial object
1448
1449	mesh_origin: A geo_reference object or 3-tuples consisting of
1450	UTM zone, easting and northing.
1451	If specified vertex coordinates are assumed to be
1452	relative to their respective origins.
1453	"""
1454
1455	# Input check
1456	if isinstance(points, Geospatial_data):
1457	msg = "Use a Geospatial_data object or a mesh origin, not both."
1458	assert geo_reference is None, msg
1459	return points
1460	else:
1461	# List or numeric array of absolute points
1462	points = ensure_numeric(points, num.float)
1463
1464	# Sort out geo reference
1465	if geo_reference is None:
1466	geo = None
1467	else:
1468	if isinstance(geo_reference, Geo_reference):
1469	geo = geo_reference
1470	else:
1471	geo = Geo_reference(geo_reference[0],
1472	geo_reference[1],
1473	geo_reference[2])
1474
1475	# Create Geospatial_data object with appropriate geo reference and return
1476	points = Geospatial_data(data_points=points, geo_reference=geo)
1477
1478	return points
1479
1480
1481	##
1482	# @brief
1483	# @param data_file
1484	# @param alpha_list
1485	# @param mesh_file
1486	# @param boundary_poly
1487	# @param mesh_resolution
1488	# @param north_boundary
1489	# @param south_boundary
1490	# @param east_boundary
1491	# @param west_boundary
1492	# @param plot_name
1493	# @param split_factor
1494	# @param seed_num
1495	# @param cache
1496	# @param verbose
1497	def find_optimal_smoothing_parameter(data_file,
1498	alpha_list=None,
1499	mesh_file=None,
1500	boundary_poly=None,
1501	mesh_resolution=100000,
1502	north_boundary=None,
1503	south_boundary=None,
1504	east_boundary=None,
1505	west_boundary=None,
1506	plot_name='all_alphas',
1507	split_factor=0.1,
1508	seed_num=None,
1509	cache=False,
1510	verbose=False):
1511	"""Removes a small random sample of points from 'data_file'.
1512	Then creates models with different alpha values from 'alpha_list' and
1513	cross validates the predicted value to the previously removed point data.
1514	Returns the alpha value which has the smallest covariance.
1515
1516	data_file: must not contain points outside the boundaries defined
1517	and it must be either a pts, txt or csv file.
1518
1519	alpha_list: the alpha values to test in a single list
1520
1521	mesh_file: name of the created mesh file or if passed in will read it.
1522	NOTE, if there is a mesh file mesh_resolution,
1523	north_boundary, south... etc will be ignored.
1524
1525	mesh_resolution: the maximum area size for a triangle
1526
1527	north_boundary... west_boundary: the value of the boundary
1528
1529	plot_name: the name for the plot contain the results
1530
1531	seed_num: the seed to the random number generator
1532
1533	USAGE:
1534	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1535	alpha_list=[0.0001, 0.01, 1],
1536	mesh_file=None,
1537	mesh_resolution=3,
1538	north_boundary=5,
1539	south_boundary=-5,
1540	east_boundary=5,
1541	west_boundary=-5,
1542	plot_name='all_alphas',
1543	seed_num=100000,
1544	verbose=False)
1545
1546	OUTPUT: returns the minumum normalised covalance calculate AND the
1547	alpha that created it. PLUS writes a plot of the results
1548
1549	NOTE: code will not work if the data_file extent is greater than the
1550	boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1551	"""
1552
1553	from anuga.shallow_water import Domain
1554	from anuga.geospatial_data.geospatial_data import Geospatial_data
1555	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1556	from anuga.utilities.numerical_tools import cov
1557	from anuga.utilities.polygon import is_inside_polygon
1558	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1559
1560	attribute_smoothed = 'elevation'
1561
1562	if mesh_file is None:
1563	if verbose: print "building mesh"
1564	mesh_file = 'temp.msh'
1565
1566	if (north_boundary is None or south_boundary is None
1567	or east_boundary is None or west_boundary is None):
1568	no_boundary = True
1569	else:
1570	no_boundary = False
1571
1572	if no_boundary is True:
1573	msg = 'All boundaries must be defined'
1574	raise Expection, msg
1575
1576	poly_topo = [[east_boundary, south_boundary],
1577	[east_boundary, north_boundary],
1578	[west_boundary, north_boundary],
1579	[west_boundary, south_boundary]]
1580
1581	create_mesh_from_regions(poly_topo,
1582	boundary_tags={'back': [2],
1583	'side': [1,3],
1584	'ocean': [0]},
1585	maximum_triangle_area=mesh_resolution,
1586	filename=mesh_file,
1587	use_cache=cache,
1588	verbose=verbose)
1589
1590	else: # if mesh file provided
1591	# test mesh file exists?
1592	if verbose: "reading from file: %s" % mesh_file
1593	if access(mesh_file,F_OK) == 0:
1594	msg = "file %s doesn't exist!" % mesh_file
1595	raise IOError, msg
1596
1597	# split topo data
1598	if verbose: print 'Reading elevation file: %s' % data_file
1599	G = Geospatial_data(file_name = data_file)
1600	if verbose: print 'Start split'
1601	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1602	if verbose: print 'Finish split'
1603	points = G_small.get_data_points()
1604
1605	if verbose: print "Number of points in sample to compare: ", len(points)
1606
1607	if alpha_list == None:
1608	alphas = [0.001,0.01,100]
1609	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,
1610	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1611	else:
1612	alphas = alpha_list
1613
1614	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1615	# 4 onwards is the elevation_predicted using the alpha, which will
1616	# be compared later against the real removed data
1617	data = num.array([], dtype=num.float)
1618
1619	data = num.resize(data, (len(points), 3+len(alphas)))
1620
1621	# gets relative point from sample
1622	data[:,0] = points[:,0]
1623	data[:,1] = points[:,1]
1624	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1625	data[:,2] = elevation_sample
1626
1627	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1628
1629	if verbose: print 'Setup computational domains with different alphas'
1630
1631	for i, alpha in enumerate(alphas):
1632	# add G_other data to domains with different alphas
1633	if verbose:
1634	print '\nCalculating domain and mesh for Alpha =', alpha, '\n'
1635	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1636	if verbose: print domain.statistics()
1637	domain.set_quantity(attribute_smoothed,
1638	geospatial_data=G_other,
1639	use_cache=cache,
1640	verbose=verbose,
1641	alpha=alpha)
1642
1643	# Convert points to geospatial data for use with get_values below
1644	points_geo = Geospatial_data(points, domain.geo_reference)
1645
1646	# returns the predicted elevation of the points that were "split" out
1647	# of the original data set for one particular alpha
1648	if verbose: print 'Get predicted elevation for location to be compared'
1649	elevation_predicted = \
1650	domain.quantities[attribute_smoothed].\
1651	get_values(interpolation_points=points_geo)
1652
1653	# add predicted elevation to array that starts with x, y, z...
1654	data[:,i+3] = elevation_predicted
1655
1656	sample_cov = cov(elevation_sample)
1657	ele_cov = cov(elevation_sample - elevation_predicted)
1658	normal_cov[i,:] = [alpha, ele_cov / sample_cov]
1659
1660	if verbose:
1661	print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1662	normal_cov[i][1]
1663	print '-------------------------------------------- \n'
1664
1665	normal_cov0 = normal_cov[:,0]
1666	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1667
1668	if plot_name is not None:
1669	from pylab import savefig, semilogx, loglog
1670
1671	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1672	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1673	savefig(plot_name, dpi=300)
1674
1675	if mesh_file == 'temp.msh':
1676	remove(mesh_file)
1677
1678	if verbose:
1679	print 'Final results:'
1680	for i, alpha in enumerate(alphas):
1681	print ('covariance for alpha %s = %s '
1682	% (normal_cov[i][0], normal_cov[i][1]))
1683	print ('\nOptimal alpha is: %s '
1684	% normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0])
1685
1686	# covariance and optimal alpha
1687	return (min(normal_cov_new[:,1]),
1688	normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1689
1690
1691	##
1692	# @brief
1693	# @param data_file
1694	# @param alpha_list
1695	# @param mesh_file
1696	# @param boundary_poly
1697	# @param mesh_resolution
1698	# @param north_boundary
1699	# @param south_boundary
1700	# @param east_boundary
1701	# @param west_boundary
1702	# @param plot_name
1703	# @param split_factor
1704	# @param seed_num
1705	# @param cache
1706	# @param verbose
1707	def old_find_optimal_smoothing_parameter(data_file,
1708	alpha_list=None,
1709	mesh_file=None,
1710	boundary_poly=None,
1711	mesh_resolution=100000,
1712	north_boundary=None,
1713	south_boundary=None,
1714	east_boundary=None,
1715	west_boundary=None,
1716	plot_name='all_alphas',
1717	split_factor=0.1,
1718	seed_num=None,
1719	cache=False,
1720	verbose=False):
1721	"""
1722	data_file: must not contain points outside the boundaries defined
1723	and it either a pts, txt or csv file.
1724
1725	alpha_list: the alpha values to test in a single list
1726
1727	mesh_file: name of the created mesh file or if passed in will read it.
1728	NOTE, if there is a mesh file mesh_resolution,
1729	north_boundary, south... etc will be ignored.
1730
1731	mesh_resolution: the maximum area size for a triangle
1732
1733	north_boundary... west_boundary: the value of the boundary
1734
1735	plot_name: the name for the plot contain the results
1736
1737	seed_num: the seed to the random number generator
1738
1739	USAGE:
1740	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1741	alpha_list=[0.0001, 0.01, 1],
1742	mesh_file=None,
1743	mesh_resolution=3,
1744	north_boundary=5,
1745	south_boundary=-5,
1746	east_boundary=5,
1747	west_boundary=-5,
1748	plot_name='all_alphas',
1749	seed_num=100000,
1750	verbose=False)
1751
1752	OUTPUT: returns the minumum normalised covalance calculate AND the
1753	alpha that created it. PLUS writes a plot of the results
1754
1755	NOTE: code will not work if the data_file extend is greater than the
1756	boundary_polygon or the north_boundary...west_boundary
1757	"""
1758
1759	from anuga.shallow_water import Domain
1760	from anuga.geospatial_data.geospatial_data import Geospatial_data
1761	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1762	from anuga.utilities.numerical_tools import cov
1763	from anuga.utilities.polygon import is_inside_polygon
1764	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1765
1766	attribute_smoothed = 'elevation'
1767
1768	if mesh_file is None:
1769	mesh_file = 'temp.msh'
1770
1771	if (north_boundary is None or south_boundary is None
1772	or east_boundary is None or west_boundary is None):
1773	no_boundary = True
1774	else:
1775	no_boundary = False
1776
1777	if no_boundary is True:
1778	msg = 'All boundaries must be defined'
1779	raise Expection, msg
1780
1781	poly_topo = [[east_boundary, south_boundary],
1782	[east_boundary, north_boundary],
1783	[west_boundary, north_boundary],
1784	[west_boundary, south_boundary]]
1785
1786	create_mesh_from_regions(poly_topo,
1787	boundary_tags={'back': [2],
1788	'side': [1,3],
1789	'ocean': [0]},
1790	maximum_triangle_area=mesh_resolution,
1791	filename=mesh_file,
1792	use_cache=cache,
1793	verbose=verbose)
1794
1795	else: # if mesh file provided
1796	# test mesh file exists?
1797	if access(mesh_file,F_OK) == 0:
1798	msg = "file %s doesn't exist!" % mesh_file
1799	raise IOError, msg
1800
1801	# split topo data
1802	G = Geospatial_data(file_name=data_file)
1803	if verbose: print 'start split'
1804	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1805	if verbose: print 'finish split'
1806	points = G_small.get_data_points()
1807
1808	#FIXME: Remove points outside boundary polygon
1809	# print 'new point',len(points)
1810	#
1811	# new_points=[]
1812	# new_points=array([],typecode=Float)
1813	# new_points=resize(new_points,(len(points),2))
1814	# print "BOUNDARY", boundary_poly
1815	# for i,point in enumerate(points):
1816	# if is_inside_polygon(point,boundary_poly, verbose=True):
1817	# new_points[i] = point
1818	# print"WOW",i,new_points[i]
1819	# points = new_points
1820
1821	if verbose: print "Number of points in sample to compare: ", len(points)
1822
1823	if alpha_list == None:
1824	alphas = [0.001,0.01,100]
1825	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,
1826	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1827	else:
1828	alphas = alpha_list
1829
1830	domains = {}
1831
1832	if verbose: print 'Setup computational domains with different alphas'
1833
1834	for alpha in alphas:
1835	# add G_other data to domains with different alphas
1836	if verbose:
1837	print '\nCalculating domain and mesh for Alpha =', alpha, '\n'
1838	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1839	if verbose: print domain.statistics()
1840	domain.set_quantity(attribute_smoothed,
1841	geospatial_data=G_other,
1842	use_cache=cache,
1843	verbose=verbose,
1844	alpha=alpha)
1845	domains[alpha] = domain
1846
1847	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1848	# 4 onwards is the elevation_predicted using the alpha, which will
1849	# be compared later against the real removed data
1850	data = num.array([], dtype=num.float)
1851
1852	data = num.resize(data, (len(points), 3+len(alphas)))
1853
1854	# gets relative point from sample
1855	data[:,0] = points[:,0]
1856	data[:,1] = points[:,1]
1857	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1858	data[:,2] = elevation_sample
1859
1860	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1861
1862	if verbose:
1863	print 'Determine difference between predicted results and actual data'
1864
1865	for i, alpha in enumerate(domains):
1866	if verbose: print'Alpha =', alpha
1867
1868	points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1869	# returns the predicted elevation of the points that were "split" out
1870	# of the original data set for one particular alpha
1871	elevation_predicted = \
1872	domains[alpha].quantities[attribute_smoothed].\
1873	get_values(interpolation_points=points_geo)
1874
1875	# add predicted elevation to array that starts with x, y, z...
1876	data[:,i+3] = elevation_predicted
1877
1878	sample_cov = cov(elevation_sample)
1879	ele_cov = cov(elevation_sample - elevation_predicted)
1880	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1881	print 'memory usage during compare', mem_usage()
1882	if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1883
1884	normal_cov0 = normal_cov[:,0]
1885	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1886
1887	if plot_name is not None:
1888	from pylab import savefig,semilogx,loglog
1889
1890	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1891	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1892	savefig(plot_name, dpi=300)
1893	if mesh_file == 'temp.msh':
1894	remove(mesh_file)
1895
1896	return (min(normal_cov_new[:,1]),
1897	normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1898
1899
1900	if __name__ == "__main__":
1901	pass
1902

Note: See TracBrowser for help on using the repository browser.

Download in other formats: