Context Navigation

source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6410

Last change on this file since 6410 was 6410, checked in by rwilson, 15 years ago
numpy changes.
File size: 69.8 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5
6	from sys import maxint
7	from os import access, F_OK, R_OK,remove
8	from types import DictType
9	from warnings import warn
10	from string import lower
11	from RandomArray import randint, seed, get_seed
12	from copy import deepcopy
13	from Scientific.IO.NetCDF import NetCDFFile
14
15	import numpy as num
16
17	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18	from anuga.utilities.numerical_tools import ensure_numeric
19	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22	from anuga.utilities.system_tools import clean_line
23	from anuga.utilities.anuga_exceptions import ANUGAError
24	from anuga.config import points_file_block_line_size as MAX_READ_LINES
25	from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
26	from anuga.config import netcdf_float
27
28	DEFAULT_ATTRIBUTE = 'elevation'
29
30
31	##
32	# @brief ??
33	class Geospatial_data:
34
35	##
36	# @brief
37	# @param data_points Mx2 iterable of tuples or array of x,y coordinates.
38	# @param attributes Associated values for each data point.
39	# @param geo_reference ??
40	# @param default_attribute_name ??
41	# @param file_name
42	# @param latitudes ??
43	# @param longitudes ??
44	# @param points_are_lats_longs True if points are lat/long, not UTM.
45	# @param max_read_lines Size of block to read, if blocking.
46	# @param load_file_now True if blocking but we eant to read file now.
47	# @param verbose True if this class instance is verbose.
48	def __init__(self,
49	data_points=None, # this can also be a points file name
50	attributes=None,
51	geo_reference=None,
52	default_attribute_name=None,
53	file_name=None,
54	latitudes=None,
55	longitudes=None,
56	points_are_lats_longs=False,
57	max_read_lines=None,
58	load_file_now=True,
59	verbose=False):
60	"""Create instance from data points and associated attributes
61
62	data_points: x,y coordinates in meters. Type must be either a
63	sequence of 2-tuples or an Mx2 numeric array of floats. A file name
64	with extension .txt, .cvs or .pts can also be passed in here.
65
66	attributes: Associated values for each data point. The type
67	must be either a list or an array of length M or a dictionary
68	of lists (or arrays) of length M. In the latter case the keys
69	in the dictionary represent the attribute names, in the former
70	the attribute will get the default name "elevation".
71
72	geo_reference: Object representing the origin of the data
73	points. It contains UTM zone, easting and northing and data
74	points are assumed to be relative to this origin.
75	If geo_reference is None, the default geo ref object is used.
76
77	default_attribute_name: Name of default attribute to be used with
78	get_attribute_values. The idea is that the dataset can be
79	equipped with information about which attribute to return.
80	If None, the default is the "first"
81
82	latitudes, longitudes: Vectors of latitudes and longitudes,
83	used to specify location instead of points.
84
85	points_are_lats_longs: Set this as true if the points are actually
86	lats and longs, not UTM
87
88	max_read_lines: The number of rows read into memory when using
89	blocking to read a file.
90
91	load_file_now: If true the file is automatically loaded
92	into the geospatial instance. Used when blocking.
93
94	file_name: Name of input netCDF file or .txt file. netCDF file must
95	have dimensions "points" etc.
96	.txt file is a comma seperated file with x, y and attribute
97	data.
98
99	The first line has the titles of the columns. The first two
100	column titles are checked to see if they start with lat or
101	long (not case sensitive). If so the data is assumed to be
102	latitude and longitude, in decimal format and converted to
103	UTM. Otherwise the first two columns are assumed to be the x
104	and y, and the title names acually used are ignored.
105
106
107	The format for a .txt file is:
108	1st line: [column names]
109	other lines: x y [attributes]
110
111	for example:
112	x, y, elevation, friction
113	0.6, 0.7, 4.9, 0.3
114	1.9, 2.8, 5, 0.3
115	2.7, 2.4, 5.2, 0.3
116
117	The first two columns have to be x, y or lat, long
118	coordinates.
119
120
121	The format for a Points dictionary is:
122	['pointlist'] a 2 column array describing points. 1st column x,
123	2nd column y.
124	['attributelist'], a dictionary of 1D arrays, representing
125	attribute values at the point. The dictionary key is the attribute
126	header.
127	['geo_reference'] a Geo_refernece object. Use if the point
128	information is relative. This is optional.
129	eg
130	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
131	dic['attributelist']['elevation'] = [[7.0,5.0]
132
133	verbose:
134	"""
135
136	if isinstance(data_points, basestring):
137	# assume data_points is really a file name
138	file_name = data_points
139
140	self.set_verbose(verbose)
141	self.geo_reference = None # create the attribute
142	self.file_name = file_name
143
144	if max_read_lines is None:
145	self.max_read_lines = MAX_READ_LINES
146	else:
147	self.max_read_lines = max_read_lines
148
149	if file_name is None:
150	if latitudes is not None \
151	or longitudes is not None \
152	or points_are_lats_longs:
153	data_points, geo_reference = \
154	_set_using_lat_long(latitudes=latitudes,
155	longitudes=longitudes,
156	geo_reference=geo_reference,
157	data_points=data_points,
158	points_are_lats_longs=\
159	points_are_lats_longs)
160	self.check_data_points(data_points)
161	self.set_attributes(attributes)
162	self.set_geo_reference(geo_reference)
163	self.set_default_attribute_name(default_attribute_name)
164	elif load_file_now is True:
165	# watch for case where file name and points,
166	# attributes etc are provided!!
167	# if file name then all provided info will be removed!
168
169	if verbose is True:
170	if file_name is not None:
171	print 'Loading Geospatial data from file: %s' % file_name
172
173	self.import_points_file(file_name, verbose=verbose)
174
175	self.check_data_points(self.data_points)
176	self.set_attributes(self.attributes)
177	self.set_geo_reference(self.geo_reference)
178	self.set_default_attribute_name(default_attribute_name)
179
180	if verbose is True:
181	if file_name is not None:
182	print 'Geospatial data created from file: %s' % file_name
183	if load_file_now is False:
184	print 'Data will be loaded blockwise on demand'
185
186	if file_name.endswith('csv') or file_name.endswith('txt'):
187	pass
188	# This message was misleading.
189	# FIXME (Ole): Are we blocking here or not?
190	# print 'ASCII formats are not that great for '
191	# print 'blockwise reading. Consider storing this'
192	# print 'data as a pts NetCDF format'
193
194	##
195	# @brief Return length of the points set.
196	def __len__(self):
197	return len(self.data_points)
198
199	##
200	# @brief Return a string representation of the points set.
201	def __repr__(self):
202	return str(self.get_data_points(absolute=True))
203
204	##
205	# @brief Check data points.
206	# @param data_points Points data to check and store in instance.
207	# @note Throws ValueError exception if no data.
208	def check_data_points(self, data_points):
209	"""Checks data points"""
210
211	if data_points is None:
212	self.data_points = None
213	msg = 'There is no data or file provided!'
214	raise ValueError, msg
215
216	else:
217	self.data_points = ensure_numeric(data_points)
218	if not (0,) == self.data_points.shape:
219	assert len(self.data_points.shape) == 2
220	assert self.data_points.shape[1] == 2
221
222	##
223	# @brief Check and assign attributes data.
224	# @param attributes Dictionary or scalar to save as .attributes.
225	# @note Throws exception if unable to convert dict keys to numeric.
226	def set_attributes(self, attributes):
227	"""Check and assign attributes dictionary"""
228
229	if attributes is None:
230	self.attributes = None
231	return
232
233	if not isinstance(attributes, DictType):
234	# Convert single attribute into dictionary
235	attributes = {DEFAULT_ATTRIBUTE: attributes}
236
237	# Check input attributes
238	for key in attributes.keys():
239	try:
240	attributes[key] = ensure_numeric(attributes[key])
241	except:
242	msg = ("Attribute '%s' (%s) could not be converted to a"
243	"numeric vector" % (str(key), str(attributes[key])))
244	raise Exception, msg
245
246	self.attributes = attributes
247
248	##
249	# @brief Set the georeference of geospatial data.
250	# @param geo_reference The georeference data to set.
251	# @note Will raise exception if param not instance of Geo_reference.
252	def set_geo_reference(self, geo_reference):
253	"""Set the georeference of geospatial data.
254
255	It can also be used to change the georeference and will ensure that
256	the absolute coordinate values are unchanged.
257	"""
258
259	from anuga.coordinate_transforms.geo_reference import Geo_reference
260
261	if geo_reference is None:
262	# Use default - points are in absolute coordinates
263	geo_reference = Geo_reference()
264
265	# Allow for tuple (zone, xllcorner, yllcorner)
266	geo_reference = ensure_geo_reference(geo_reference)
267
268	if not isinstance(geo_reference, Geo_reference):
269	# FIXME (Ole): This exception will be raised even
270	# if geo_reference is None. Is that the intent Duncan?
271	msg = 'Argument geo_reference must be a valid Geo_reference '
272	msg += 'object or None.'
273	raise Expection, msg
274
275	# If a geo_reference already exists, change the point data according to
276	# the new geo reference
277	if self.geo_reference is not None:
278	self.data_points = self.get_data_points(geo_reference=geo_reference)
279
280	self.geo_reference = geo_reference
281
282	##
283	# @brief Set default attribute name.
284	# @param default_attribute_name The default to save.
285	def set_default_attribute_name(self, default_attribute_name):
286	self.default_attribute_name = default_attribute_name
287
288	##
289	# @brief Set the instance verbose flag.
290	# @param verbose The value to save.
291	# @note Will raise exception if param is not True or False.
292	def set_verbose(self, verbose=False):
293	if verbose in [False, True]:
294	self.verbose = verbose
295	else:
296	msg = 'Illegal value: %s' % str(verbose)
297	raise Exception, msg
298
299	##
300	# @brief Clip geospatial data by a given polygon.
301	# @param polygon The polygon to clip with.
302	# @param closed True if points on clip boundary are not included in result.
303	# @param verbose True if this function is verbose.
304	def clip(self, polygon, closed=True, verbose=False):
305	"""Clip geospatial data by a polygon
306
307	Input
308	polygon - Either a list of points, an Nx2 array or
309	a Geospatial data object.
310	closed - (optional) determine whether points on boundary should be
311	regarded as belonging to the polygon (closed = True)
312	or not (closed = False). Default is True.
313
314	Output
315	New geospatial data object representing points inside
316	specified polygon.
317
318
319	Note - this method is non-destructive and leaves the data in 'self'
320	unchanged
321	"""
322
323	from anuga.utilities.polygon import inside_polygon
324
325	if isinstance(polygon, Geospatial_data):
326	# Polygon is an object - extract points
327	polygon = polygon.get_data_points()
328
329	points = self.get_data_points()
330	inside_indices = inside_polygon(points, polygon, closed, verbose)
331
332	clipped_G = self.get_sample(inside_indices)
333
334	return clipped_G
335
336	##
337	# @brief Clip points data by polygon, return points outside polygon.
338	# @param polygon The polygon to clip with.
339	# @param closed True if points on clip boundary are not included in result.
340	# @param verbose True if this function is verbose.
341	def clip_outside(self, polygon, closed=True, verbose=False):
342	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
343
344	Input
345	polygon - Either a list of points, an Nx2 array or
346	a Geospatial data object.
347	closed - (optional) determine whether points on boundary should be
348	regarded as belonging to the polygon (closed = True)
349	or not (closed = False). Default is True.
350
351	Output
352	Geospatial data object representing point OUTSIDE specified polygon
353	"""
354
355	from anuga.utilities.polygon import outside_polygon
356
357	if isinstance(polygon, Geospatial_data):
358	# Polygon is an object - extract points
359	polygon = polygon.get_data_points()
360
361	points = self.get_data_points()
362	outside_indices = outside_polygon(points, polygon, closed,verbose)
363
364	clipped_G = self.get_sample(outside_indices)
365
366	return clipped_G
367
368	##
369	# @brief Get instance geo_reference data.
370	def get_geo_reference(self):
371	return self.geo_reference
372
373	##
374	# @brief Get coordinates for all data points as an Nx2 array.
375	# @param absolute If True, return UTM, else relative to xll/yll corners.
376	# @param geo_reference If supplied, points are relative to it.
377	# @param as_lat_long If True, return points as lat/lon.
378	# @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
379	# @return A set of data points, in appropriate form.
380	def get_data_points(self, absolute=True, geo_reference=None,
381	as_lat_long=False, isSouthHemisphere=True):
382	"""Get coordinates for all data points as an Nx2 array
383
384	If absolute is False returned coordinates are relative to the
385	internal georeference's xll and yll corners, otherwise
386	absolute UTM coordinates are returned.
387
388	If a geo_reference is passed the points are returned relative
389	to that geo_reference.
390
391	isSH (isSouthHemisphere) is only used when getting data
392	points "as_lat_long" is True and if FALSE will return lats and
393	longs valid for the Northern Hemisphere.
394
395	Default: absolute is True.
396	"""
397
398	if as_lat_long is True:
399	msg = "Points need a zone to be converted into lats and longs"
400	assert self.geo_reference is not None, msg
401	zone = self.geo_reference.get_zone()
402	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
403	lats_longs = []
404	for point in self.get_data_points(True):
405	# UTMtoLL(northing, easting, zone,
406	lat_calced, long_calced = UTMtoLL(point[1], point[0],
407	zone, isSouthHemisphere)
408	lats_longs.append((lat_calced, long_calced)) # to hash
409	return lats_longs
410
411	if absolute is True and geo_reference is None:
412	return self.geo_reference.get_absolute(self.data_points)
413	elif geo_reference is not None:
414	return geo_reference.change_points_geo_ref(self.data_points,
415	self.geo_reference)
416	else:
417	# If absolute is False
418	return self.data_points
419
420	##
421	# @brief Get value for attribute name.
422	# @param attribute_name Name to get value for.
423	# @note If name passed is None, return default attribute value.
424	def get_attributes(self, attribute_name=None):
425	"""Return values for one named attribute.
426
427	If attribute_name is None, default_attribute_name is used
428	"""
429
430	if attribute_name is None:
431	if self.default_attribute_name is not None:
432	attribute_name = self.default_attribute_name
433	else:
434	attribute_name = self.attributes.keys()[0]
435	# above line takes the first one from keys
436
437	if self.verbose is True:
438	print 'Using attribute %s' %attribute_name
439	print 'Available attributes: %s' %(self.attributes.keys())
440
441	msg = 'Attribute name %s does not exist in data set' % attribute_name
442	assert self.attributes.has_key(attribute_name), msg
443
444	return self.attributes[attribute_name]
445
446	##
447	# @brief Get all instance attributes.
448	# @return The instance attribute dictionary, or None if no attributes.
449	def get_all_attributes(self):
450	"""Return values for all attributes.
451	The return value is either None or a dictionary (possibly empty).
452	"""
453
454	return self.attributes
455
456	##
457	# @brief Override __add__() to allow addition of geospatial objects.
458	# @param self This object.
459	# @param other The second object.
460	# @return The new geospatial object.
461	def __add__(self, other):
462	"""Returns the addition of 2 geospatical objects,
463	objects are concatencated to the end of each other
464
465	NOTE: doesn't add if objects contain different
466	attributes
467
468	Always return absolute points!
469	This also means, that if you add None to the object,
470	it will be turned into absolute coordinates
471
472	other can be None in which case nothing is added to self.
473	"""
474
475	# find objects zone and checks if the same
476	geo_ref1 = self.get_geo_reference()
477	zone1 = geo_ref1.get_zone()
478
479	if other is not None:
480	geo_ref2 = other.get_geo_reference()
481	zone2 = geo_ref2.get_zone()
482	geo_ref1.reconcile_zones(geo_ref2)
483	new_points = num.concatenate((self.get_data_points(absolute=True),
484	other.get_data_points(absolute=True)),
485	axis = 0)
486
487	# Concatenate attributes if any
488	if self.attributes is None:
489	if other.attributes is not None:
490	msg = 'Geospatial data must have the same '
491	msg += 'attributes to allow addition.'
492	raise Exception, msg
493
494	new_attributes = None
495	else:
496	new_attributes = {}
497	for x in self.attributes.keys():
498	if other.attributes.has_key(x):
499	attrib1 = self.attributes[x]
500	attrib2 = other.attributes[x]
501	new_attributes[x] = num.concatenate((attrib1, attrib2))
502	else:
503	msg = 'Geospatial data must have the same \n'
504	msg += 'attributes to allow addition.'
505	raise Exception, msg
506	else:
507	# other is None:
508	new_points = self.get_data_points(absolute=True)
509	new_attributes = self.attributes
510
511	# Instantiate new data object and return absolute coordinates
512	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
513	return Geospatial_data(new_points, new_attributes, new_geo_ref)
514
515	##
516	# @brief Override the addition case where LHS isn't geospatial object.
517	# @param self This object.
518	# @param other The second object.
519	# @return The new geospatial object.
520	def __radd__(self, other):
521	"""Handle cases like None + Geospatial_data(...)"""
522
523	return self + other
524
525	###
526	# IMPORT/EXPORT POINTS FILES
527	###
528
529	##
530	# @brief Import a .txt, .csv or .pts points data file.
531	# @param file_name
532	# @param delimiter
533	# @param verbose True if this function is to be verbose.
534	# @note Will throw IOError or SyntaxError if there is a problem.
535	def import_points_file(self, file_name, delimiter=None, verbose=False):
536	""" load an .txt, .csv or .pts file
537
538	Note: will throw an IOError/SyntaxError if it can't load the file.
539	Catch these!
540
541	Post condition: self.attributes dictionary has been set
542	"""
543
544	if access(file_name, F_OK) == 0 :
545	msg = 'File %s does not exist or is not accessible' % file_name
546	raise IOError, msg
547
548	attributes = {}
549	if file_name[-4:] == ".pts":
550	try:
551	data_points, attributes, geo_reference = \
552	_read_pts_file(file_name, verbose)
553	except IOError, e:
554	msg = 'Could not open file %s ' % file_name
555	raise IOError, msg
556	elif file_name[-4:] == ".txt" or file_name[-4:]== ".csv":
557	try:
558	data_points, attributes, geo_reference = \
559	_read_csv_file(file_name, verbose)
560	except IOError, e:
561	# This should only be if a file is not found
562	msg = 'Could not open file %s. ' % file_name
563	msg += 'Check the file location.'
564	raise IOError, msg
565	except SyntaxError, e:
566	# This should only be if there is a format error
567	msg = 'Problem with format of file %s. \n' %file_name
568	msg += Error_message['IOError']
569	raise SyntaxError, msg
570	else:
571	msg = 'Extension %s is unknown' %file_name[-4:]
572	raise IOError, msg
573
574	self.data_points = data_points
575	self.attributes = attributes
576	self.geo_reference = geo_reference
577
578	##
579	# @brief Write points data to a file (.csv or .pts).
580	# @param file_name Path to file to write.
581	# @param absolute ??
582	# @param as_lat_long ??
583	# @param isSouthHemisphere ??
584	def export_points_file(self, file_name, absolute=True,
585	as_lat_long=False, isSouthHemisphere=True):
586	"""write a points file as a text (.csv) or binary (.pts) file
587
588	file_name is the file name, including the extension
589	The point_dict is defined at the top of this file.
590
591	If absolute is True data the xll and yll are added to the points value
592	and the xll and yll of the geo_reference are set to 0.
593
594	If absolute is False data points at returned as relative to the xll
595	and yll and geo_reference remains uneffected
596
597	isSouthHemisphere: is only used when getting data
598	points "as_lat_long" is True and if FALSE will return lats and
599	longs valid for the Northern Hemisphere.
600	"""
601
602	if (file_name[-4:] == ".pts"):
603	if absolute is True:
604	geo_ref = deepcopy(self.geo_reference)
605	geo_ref.xllcorner = 0
606	geo_ref.yllcorner = 0
607	_write_pts_file(file_name,
608	self.get_data_points(absolute),
609	self.get_all_attributes(),
610	geo_ref)
611	else:
612	_write_pts_file(file_name,
613	self.get_data_points(absolute),
614	self.get_all_attributes(),
615	self.get_geo_reference())
616
617	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
618	msg = "ERROR: trying to write a .txt file with relative data."
619	assert absolute, msg
620	_write_csv_file(file_name,
621	self.get_data_points(absolute=True,
622	as_lat_long=as_lat_long,
623	isSouthHemisphere=isSouthHemisphere),
624	self.get_all_attributes(),
625	as_lat_long=as_lat_long)
626
627	elif file_name[-4:] == ".urs" :
628	msg = "ERROR: Can not write a .urs file as a relative file."
629	assert absolute, msg
630	_write_urs_file(file_name,
631	self.get_data_points(as_lat_long=True,
632	isSouthHemisphere=isSouthHemisphere))
633
634	else:
635	msg = 'Unknown file type %s ' %file_name
636	raise IOError, msg
637
638	##
639	# @brief Get a subset of data that is referred to by 'indices'.
640	# @param indices A list of indices to select data subset with.
641	# @return A geospatial object containing data subset.
642	def get_sample(self, indices):
643	""" Returns a object which is a subset of the original
644	and the data points and attributes in this new object refer to
645	the indices provided
646
647	Input
648	indices- a list of integers that represent the new object
649	Output
650	New geospatial data object representing points specified by
651	the indices
652	"""
653
654	# FIXME: add the geo_reference to this
655	points = self.get_data_points()
656	sampled_points = num.take(points, indices, axis=0)
657
658	attributes = self.get_all_attributes()
659
660	sampled_attributes = {}
661	if attributes is not None:
662	for key, att in attributes.items():
663	sampled_attributes[key] = num.take(att, indices, axis=0)
664
665	return Geospatial_data(sampled_points, sampled_attributes)
666
667	##
668	# @brief Split one geospatial object into two.
669	# @param factor Relative size to make first result object.
670	# @param seed_num Random 'seed' - used only for unit test.
671	# @param verbose True if this function is to be verbose.
672	# @note Points in each result object are selected randomly.
673	def split(self, factor=0.5, seed_num=None, verbose=False):
674	"""Returns two geospatial_data object, first is the size of the 'factor'
675	smaller the original and the second is the remainder. The two
676	new objects are disjoint sets of each other.
677
678	Points of the two new object have selected RANDOMLY.
679
680	This method create two lists of indices which are passed into
681	get_sample. The lists are created using random numbers, and
682	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
683	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
684
685	Input - the factor which to split the object, if 0.1 then 10% of the
686	together object will be returned
687
688	Output - two geospatial_data objects that are disjoint sets of the
689	original
690	"""
691
692	i = 0
693	self_size = len(self)
694	random_list = []
695	remainder_list = []
696	new_size = round(factor*self_size)
697
698	# Find unique random numbers
699	if verbose: print "make unique random number list and get indices"
700
701	total = num.array(range(self_size), num.int) #array default#
702	total_list = total.tolist()
703
704	if verbose: print "total list len", len(total_list)
705
706	# There will be repeated random numbers however will not be a
707	# problem as they are being 'pop'ed out of array so if there
708	# are two numbers the same they will pop different indicies,
709	# still basically random
710	## create list of non-unquie random numbers
711	if verbose: print "create random numbers list %s long" %new_size
712
713	# Set seed if provided, mainly important for unit test!
714	# plus recalcule seed when no seed provided.
715	if seed_num != None:
716	seed(seed_num, seed_num)
717	else:
718	seed()
719
720	if verbose: print "seed:", get_seed()
721
722	random_num = randint(0, self_size-1, (int(new_size),))
723	random_num = random_num.tolist()
724
725	# need to sort and reverse so the pop() works correctly
726	random_num.sort()
727	random_num.reverse()
728
729	if verbose: print "make random number list and get indices"
730
731	j = 0
732	k = 1
733	remainder_list = total_list[:]
734
735	# pops array index (random_num) from remainder_list
736	# (which starts as the
737	# total_list and appends to random_list
738	random_num_len = len(random_num)
739	for i in random_num:
740	random_list.append(remainder_list.pop(i))
741	j += 1
742	# prints progress
743	if verbose and round(random_num_len/10*k) == j:
744	print '(%s/%s)' % (j, random_num_len)
745	k += 1
746
747	# FIXME: move to tests, it might take a long time
748	# then create an array of random lenght between 500 and 1000,
749	# and use a random factor between 0 and 1
750	# setup for assertion
751	test_total = random_list[:]
752	test_total.extend(remainder_list)
753	test_total.sort()
754	msg = 'The two random lists made from the original list when added ' \
755	'together DO NOT equal the original list'
756	assert total_list == test_total, msg
757
758	# Get new samples
759	if verbose: print "get values of indices for random list"
760	G1 = self.get_sample(random_list)
761	if verbose: print "get values of indices for opposite of random list"
762	G2 = self.get_sample(remainder_list)
763
764	return G1, G2
765
766	##
767	# @brief Allow iteration over this object.
768	def __iter__(self):
769	"""Read in the header, number_of_points and save the
770	file pointer position
771	"""
772	from Scientific.IO.NetCDF import NetCDFFile
773	# FIXME - what to do if the file isn't there
774
775	# FIXME (Ole): Shouldn't this go into the constructor?
776	# This method acts like the constructor when blocking.
777	# ... and shouldn't it be called block_size?
778	#
779	if self.max_read_lines is None:
780	self.max_read_lines = MAX_READ_LINES
781
782	if self.file_name[-4:] == ".pts":
783	# See if the file is there. Throw a QUIET IO error if it isn't
784	fd = open(self.file_name,'r')
785	fd.close()
786
787	# Throws prints to screen if file not present
788	self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
789
790	(self.blocking_georef,
791	self.blocking_keys,
792	self.number_of_points) = _read_pts_file_header(self.fid,
793	self.verbose)
794	self.start_row = 0
795	self.last_row = self.number_of_points
796	self.show_verbose = 0
797	self.verbose_block_size = (self.last_row + 10)/10
798	self.block_number = 0
799	self.number_of_blocks = self.number_of_points/self.max_read_lines
800	# This computes the number of full blocks. The last block may be
801	# smaller and won't be ircluded in this estimate.
802
803	if self.verbose is True:
804	print 'Reading %d points (in ~%d blocks) from file %s. ' \
805	% (self.number_of_points,
806	self.number_of_blocks,
807	self.file_name),
808	print 'Each block consists of %d data points' \
809	% self.max_read_lines
810
811	else:
812	# Assume the file is a csv file
813	file_pointer = open(self.file_name)
814	self.header, self.file_pointer = _read_csv_file_header(file_pointer)
815	self.blocking_georef = None # Used for reconciling zones
816
817	return self
818
819	##
820	# @brief Read another block into the instance.
821	def next(self):
822	"""read a block, instanciate a new geospatial and return it"""
823
824	if self.file_name[-4:] == ".pts":
825	if self.start_row == self.last_row:
826	# Read the end of the file last iteration
827	# Remove blocking attributes
828	self.fid.close()
829	del self.max_read_lines
830	del self.blocking_georef
831	del self.last_row
832	del self.start_row
833	del self.blocking_keys
834	del self.fid
835	raise StopIteration
836	fin_row = self.start_row + self.max_read_lines
837	if fin_row > self.last_row:
838	fin_row = self.last_row
839
840	if self.verbose is True:
841	if self.show_verbose >= self.start_row \
842	and self.show_verbose < fin_row:
843	print 'Reading block %d (points %d to %d) out of %d'\
844	%(self.block_number,
845	self.start_row,
846	fin_row,
847	self.number_of_blocks)
848
849	self.show_verbose += max(self.max_read_lines,
850	self.verbose_block_size)
851
852
853	# Read next block
854	pointlist, att_dict, = _read_pts_file_blocking(self.fid,
855	self.start_row,
856	fin_row,
857	self.blocking_keys)
858
859	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
860	self.start_row = fin_row
861
862	self.block_number += 1
863
864	else:
865	# Assume the file is a csv file
866	try:
867	(pointlist,
868	att_dict,
869	geo_ref,
870	self.file_pointer) = \
871	_read_csv_file_blocking(self.file_pointer,
872	self.header[:],
873	max_read_lines=\
874	self.max_read_lines,
875	verbose=self.verbose)
876
877	# Check that the zones haven't changed.
878	if geo_ref is not None:
879	geo_ref.reconcile_zones(self.blocking_georef)
880	self.blocking_georef = geo_ref
881	elif self.blocking_georef is not None:
882	msg = 'Geo reference given, then not given.'
883	msg += ' This should not happen.'
884	raise ValueError, msg
885	geo = Geospatial_data(pointlist, att_dict, geo_ref)
886	except StopIteration:
887	self.file_pointer.close()
888	del self.header
889	del self.file_pointer
890	raise StopIteration
891	except ANUGAError:
892	self.file_pointer.close()
893	del self.header
894	del self.file_pointer
895	raise
896	except SyntaxError:
897	self.file_pointer.close()
898	del self.header
899	del self.file_pointer
900	# This should only be if there is a format error
901	msg = 'Could not open file %s. \n' % self.file_name
902	msg += Error_message['IOError']
903	raise SyntaxError, msg
904	return geo
905
906
907	##################### Error messages ###########
908	Error_message = {}
909	Em = Error_message
910	Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
911	Em['IOError'] += " 1st line: [column names]\n"
912	Em['IOError'] += " other lines: [x value], [y value], [attributes]\n"
913	Em['IOError'] += "\n"
914	Em['IOError'] += " for example:\n"
915	Em['IOError'] += " x, y, elevation, friction\n"
916	Em['IOError'] += " 0.6, 0.7, 4.9, 0.3\n"
917	Em['IOError'] += " 1.9, 2.8, 5, 0.3\n"
918	Em['IOError'] += " 2.7, 2.4, 5.2, 0.3\n"
919	Em['IOError'] += "\n"
920	Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
921	Em['IOError'] += "The attribute values must be numeric.\n"
922
923	##
924	# @brief ??
925	# @param latitudes ??
926	# @param longitudes ??
927	# @param geo_reference ??
928	# @param data_points ??
929	# @param points_are_lats_longs ??
930	def _set_using_lat_long(latitudes,
931	longitudes,
932	geo_reference,
933	data_points,
934	points_are_lats_longs):
935	"""If the points has lat long info, assume it is in (lat, long) order."""
936
937	if geo_reference is not None:
938	msg = "A georeference is specified yet latitude and longitude " \
939	"are also specified!"
940	raise ValueError, msg
941
942	if data_points is not None and not points_are_lats_longs:
943	msg = "Data points are specified yet latitude and longitude are " \
944	"also specified."
945	raise ValueError, msg
946
947	if points_are_lats_longs:
948	if data_points is None:
949	msg = "Data points are not specified."
950	raise ValueError, msg
951	lats_longs = ensure_numeric(data_points)
952	latitudes = num.ravel(lats_longs[:,0:1])
953	longitudes = num.ravel(lats_longs[:,1:])
954
955	if latitudes is None and longitudes is None:
956	msg = "Latitudes and Longitudes are not specified."
957	raise ValueError, msg
958
959	if latitudes is None:
960	msg = "Longitudes are specified yet latitudes aren't."
961	raise ValueError, msg
962
963	if longitudes is None:
964	msg = "Latitudes are specified yet longitudes aren't."
965	raise ValueError, msg
966
967	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
968	longitudes=longitudes)
969	return data_points, Geo_reference(zone=zone)
970
971
972	##
973	# @brief Read a .pts data file.
974	# @param file_name Path to file to read.
975	# @param verbose True if this function is to be verbose.
976	# @return (pointlist, attributes, geo_reference)
977	def _read_pts_file(file_name, verbose=False):
978	"""Read .pts NetCDF file
979
980	Return a dic of array of points, and dic of array of attribute
981	eg
982	dic['points'] = [[1.0,2.0],[3.0,5.0]]
983	dic['attributelist']['elevation'] = [[7.0,5.0]]
984	"""
985
986	from Scientific.IO.NetCDF import NetCDFFile
987
988	if verbose: print 'Reading ', file_name
989
990	# See if the file is there. Throw a QUIET IO error if it isn't
991	fd = open(file_name,'r')
992	fd.close()
993
994	# Throws prints to screen if file not present
995	fid = NetCDFFile(file_name, netcdf_mode_r)
996
997	pointlist = num.array(fid.variables['points'])
998	keys = fid.variables.keys()
999
1000	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1001
1002	try:
1003	keys.remove('points')
1004	except IOError, e:
1005	fid.close()
1006	msg = "Expected keyword 'points' but could not find it"
1007	raise IOError, msg
1008
1009	attributes = {}
1010	for key in keys:
1011	if verbose: print "reading attribute '%s'" % key
1012
1013	attributes[key] = num.array(fid.variables[key])
1014
1015	try:
1016	geo_reference = Geo_reference(NetCDFObject=fid)
1017	except AttributeError, e:
1018	geo_reference = None
1019
1020	fid.close()
1021
1022	return pointlist, attributes, geo_reference
1023
1024
1025	##
1026	# @brief Read a .csv data file.
1027	# @param file_name Path to the .csv file to read.
1028	# @param verbose True if this function is to be verbose.
1029	def _read_csv_file(file_name, verbose=False):
1030	"""Read .csv file
1031
1032	Return a dic of array of points, and dic of array of attribute
1033	eg
1034	dic['points'] = [[1.0,2.0],[3.0,5.0]]
1035	dic['attributelist']['elevation'] = [[7.0,5.0]]
1036	"""
1037
1038	file_pointer = open(file_name)
1039	header, file_pointer = _read_csv_file_header(file_pointer)
1040	try:
1041	(pointlist,
1042	att_dict,
1043	geo_ref,
1044	file_pointer) = _read_csv_file_blocking(file_pointer,
1045	header,
1046	max_read_lines=1e30)
1047	# If the file is bigger that this, block..
1048	# FIXME (Ole) What's up here?
1049	except ANUGAError:
1050	file_pointer.close()
1051	raise
1052
1053	file_pointer.close()
1054
1055	return pointlist, att_dict, geo_ref
1056
1057
1058	##
1059	# @brief Read a .csv file header.
1060	# @param file_pointer Open descriptor of the file to read.
1061	# @param delimiter Header line delimiter string, split on this string.
1062	# @param verbose True if this function is to be verbose.
1063	# @return A tuple of (<cleaned header string>, <input file_pointer>)
1064
1065	CSV_DELIMITER = ','
1066
1067	def _read_csv_file_header(file_pointer,
1068	delimiter=CSV_DELIMITER,
1069	verbose=False):
1070	"""Read the header of a .csv file
1071	Return a list of the header names
1072	"""
1073
1074	line = file_pointer.readline()
1075	header = clean_line(line, delimiter)
1076
1077	return header, file_pointer
1078
1079	##
1080	# @brief Read a .csv file, with blocking.
1081	# @param file_pointer Open descriptor of the file to read.
1082	# @param header List of already read .csv header fields.
1083	# @param delimiter Delimiter string header was split on.
1084	# @param max_read_lines The max number of lines to read before blocking.
1085	# @param verbose True if this function is to be verbose.
1086	# @note Will throw IndexError, SyntaxError exceptions.
1087	def _read_csv_file_blocking(file_pointer,
1088	header,
1089	delimiter=CSV_DELIMITER,
1090	max_read_lines=MAX_READ_LINES,
1091	verbose=False):
1092	"""Read the body of a .csv file.
1093	header: The list header of the csv file, with the x and y labels.
1094	"""
1095
1096	points = []
1097	pointattributes = []
1098	att_dict = {}
1099
1100	# This is to remove the x and y headers.
1101	header = header[:]
1102	try:
1103	x_header = header.pop(0)
1104	y_header = header.pop(0)
1105	except IndexError:
1106	# if there are not two columns this will occur.
1107	# eg if it is a space seperated file
1108	raise SyntaxError
1109
1110	read_lines = 0
1111	while read_lines < max_read_lines:
1112	line = file_pointer.readline()
1113	numbers = clean_line(line, delimiter)
1114	if len(numbers) <= 1:
1115	break
1116	if line[0] == '#':
1117	continue
1118
1119	read_lines += 1
1120
1121	try:
1122	x = float(numbers[0])
1123	y = float(numbers[1])
1124	points.append([x,y])
1125	numbers.pop(0)
1126	numbers.pop(0)
1127	if len(header) != len(numbers):
1128	file_pointer.close()
1129	msg = "File load error. " \
1130	"There might be a problem with the file header."
1131	raise SyntaxError, msg
1132	for i,n in enumerate(numbers):
1133	n.strip()
1134	if n != '\n' and n != '':
1135	#attributes.append(float(n))
1136	att_dict.setdefault(header[i],[]).append(float(n))
1137	except ValueError:
1138	raise SyntaxError
1139
1140	if points == []:
1141	raise StopIteration
1142
1143	pointlist = num.array(points, num.float)
1144	for key in att_dict.keys():
1145	att_dict[key] = num.array(att_dict[key], num.float)
1146
1147	# Do stuff here so the info is in lat's and longs
1148	geo_ref = None
1149	x_header = lower(x_header[:3])
1150	y_header = lower(y_header[:3])
1151	if (x_header == 'lon' or x_header == 'lat') \
1152	and (y_header == 'lon' or y_header == 'lat'):
1153	if x_header == 'lon':
1154	longitudes = num.ravel(pointlist[:,0:1])
1155	latitudes = num.ravel(pointlist[:,1:])
1156	else:
1157	latitudes = num.ravel(pointlist[:,0:1])
1158	longitudes = num.ravel(pointlist[:,1:])
1159
1160	pointlist, geo_ref = _set_using_lat_long(latitudes,
1161	longitudes,
1162	geo_reference=None,
1163	data_points=None,
1164	points_are_lats_longs=False)
1165
1166	return pointlist, att_dict, geo_ref, file_pointer
1167
1168
1169	##
1170	# @brief Read a .pts file header.
1171	# @param fid Handle to the open .pts file.
1172	# @param verbose True if the function is to be verbose.
1173	# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1174	# @note Will throw IOError and AttributeError exceptions.
1175	def _read_pts_file_header(fid, verbose=False):
1176	"""Read the geo_reference and number_of_points from a .pts file"""
1177
1178	keys = fid.variables.keys()
1179	try:
1180	keys.remove('points')
1181	except IOError, e:
1182	fid.close()
1183	msg = "Expected keyword 'points' but could not find it."
1184	raise IOError, msg
1185
1186	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1187
1188	try:
1189	geo_reference = Geo_reference(NetCDFObject=fid)
1190	except AttributeError, e:
1191	geo_reference = None
1192
1193	return geo_reference, keys, fid.dimensions['number_of_points']
1194
1195
1196	##
1197	# @brief Read the body of a .csf file, with blocking.
1198	# @param fid Handle to already open file.
1199	# @param start_row Start row index of points to return.
1200	# @param fin_row End row index of points to return.
1201	# @param keys Iterable of keys to return.
1202	# @return Tuple of (pointlist, attributes).
1203	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1204	"""Read the body of a .csv file."""
1205
1206	pointlist = num.array(fid.variables['points'][start_row:fin_row])
1207
1208	attributes = {}
1209	for key in keys:
1210	attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1211
1212	return pointlist, attributes
1213
1214
1215	##
1216	# @brief Write a .pts data file.
1217	# @param file_name Path to the file to write.
1218	# @param write_data_points Data points to write.
1219	# @param write_attributes Attributes to write.
1220	# @param write_geo_reference Georef to write.
1221	def _write_pts_file(file_name,
1222	write_data_points,
1223	write_attributes=None,
1224	write_geo_reference=None):
1225	"""Write .pts NetCDF file
1226
1227	NOTE: Below might not be valid ask Duncan : NB 5/2006
1228
1229	WARNING: This function mangles the point_atts data structure
1230	# F??ME: (DSG)This format has issues.
1231	# There can't be an attribute called points
1232	# consider format change
1233	# method changed by NB not sure if above statement is correct
1234
1235	should create new test for this
1236	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1237	for key in point_atts.keys():
1238	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1239	assert key in legal_keys, msg
1240	"""
1241
1242	from Scientific.IO.NetCDF import NetCDFFile
1243
1244	# NetCDF file definition
1245	outfile = NetCDFFile(file_name, netcdf_mode_w)
1246
1247	# Create new file
1248	outfile.institution = 'Geoscience Australia'
1249	outfile.description = ('NetCDF format for compact and portable storage '
1250	'of spatial point data')
1251
1252	# Dimension definitions
1253	shape = write_data_points.shape[0]
1254	outfile.createDimension('number_of_points', shape)
1255	outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1256
1257	# Variable definition
1258	outfile.createVariable('points', netcdf_float, ('number_of_points',
1259	'number_of_dimensions'))
1260
1261	# create variables
1262	outfile.variables['points'][:] = write_data_points
1263
1264	if write_attributes is not None:
1265	for key in write_attributes.keys():
1266	outfile.createVariable(key, netcdf_float, ('number_of_points',))
1267	outfile.variables[key][:] = write_attributes[key]
1268
1269	if write_geo_reference is not None:
1270	write_NetCDF_georeference(write_geo_reference, outfile)
1271
1272	outfile.close()
1273
1274
1275	##
1276	# @brief Write a .csv data file.
1277	# @param file_name Path to the file to write.
1278	# @param write_data_points Data points to write.
1279	# @param write_attributes Attributes to write.
1280	# @param as_lat_long True if points are lat/lon, else x/y.
1281	# @param delimiter The CSV delimiter to use.
1282	def _write_csv_file(file_name,
1283	write_data_points,
1284	write_attributes=None,
1285	as_lat_long=False,
1286	delimiter=','):
1287	"""Write a .csv file."""
1288
1289	points = write_data_points
1290	pointattributes = write_attributes
1291
1292	fd = open(file_name, 'w')
1293
1294	if as_lat_long:
1295	titlelist = "latitude" + delimiter + "longitude" + delimiter
1296	else:
1297	titlelist = "x" + delimiter + "y" + delimiter
1298
1299	if pointattributes is not None:
1300	for title in pointattributes.keys():
1301	titlelist = titlelist + title + delimiter
1302	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1303
1304	fd.write(titlelist + "\n")
1305
1306	# <x/lat> <y/long> [attributes]
1307	for i, vert in enumerate( points):
1308	if pointattributes is not None:
1309	attlist = ","
1310	for att in pointattributes.keys():
1311	attlist = attlist + str(pointattributes[att][i]) + delimiter
1312	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1313	attlist.strip()
1314	else:
1315	attlist = ''
1316
1317	fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1318
1319	fd.close()
1320
1321
1322	##
1323	# @brief Write a URS file.
1324	# @param file_name The path of the file to write.
1325	# @param points
1326	# @param delimiter
1327	def _write_urs_file(file_name, points, delimiter=' '):
1328	"""Write a URS format file.
1329	export a file, file_name, with the urs format
1330	the data points are in lats and longs
1331	"""
1332
1333	fd = open(file_name, 'w')
1334
1335	# first line is # points
1336	fd.write(str(len(points)) + "\n")
1337
1338	# <lat> <long> <id#>
1339	for i, vert in enumerate( points):
1340	fd.write(str(round(vert[0],7)) + delimiter +
1341	str(round(vert[1],7)) + delimiter + str(i) + "\n")
1342
1343	fd.close()
1344
1345
1346	##
1347	# @brief ??
1348	# @param point_atts ??
1349	# @return ??
1350	def _point_atts2array(point_atts):
1351	point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1352
1353	for key in point_atts['attributelist'].keys():
1354	point_atts['attributelist'][key] = \
1355	num.array(point_atts['attributelist'][key], num.float)
1356
1357	return point_atts
1358
1359
1360	##
1361	# @brief Convert geospatial object to a points dictionary.
1362	# @param geospatial_data The geospatial object to convert.
1363	# @return A points dictionary.
1364	def geospatial_data2points_dictionary(geospatial_data):
1365	"""Convert geospatial data to points_dictionary"""
1366
1367	points_dictionary = {}
1368	points_dictionary['pointlist'] = geospatial_data.data_points
1369
1370	points_dictionary['attributelist'] = {}
1371
1372	for attribute_name in geospatial_data.attributes.keys():
1373	val = geospatial_data.attributes[attribute_name]
1374	points_dictionary['attributelist'][attribute_name] = val
1375
1376	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1377
1378	return points_dictionary
1379
1380
1381	##
1382	# @brief Convert a points dictionary to a geospatial object.
1383	# @param points_dictionary A points dictionary to convert.
1384	def points_dictionary2geospatial_data(points_dictionary):
1385	"""Convert points_dictionary to geospatial data object"""
1386
1387	msg = 'Points dictionary must have key pointlist'
1388	assert points_dictionary.has_key('pointlist'), msg
1389
1390	msg = 'Points dictionary must have key attributelist'
1391	assert points_dictionary.has_key('attributelist'), msg
1392
1393	if points_dictionary.has_key('geo_reference'):
1394	geo = points_dictionary['geo_reference']
1395	else:
1396	geo = None
1397
1398	return Geospatial_data(points_dictionary['pointlist'],
1399	points_dictionary['attributelist'],
1400	geo_reference = geo)
1401
1402
1403	##
1404	# @brief Ensure that points are in absolute coordinates.
1405	# @param points A list or array of points to check, or geospatial object.
1406	# @param geo_reference If supplied,
1407	# @return ??
1408	def ensure_absolute(points, geo_reference=None):
1409	"""Ensure that points are in absolute coordinates.
1410
1411	This function inputs several formats and
1412	outputs one format. - a numeric array of absolute points.
1413
1414	Input formats are;
1415	points: List or numeric array of coordinate pairs [xi, eta] of
1416	points or geospatial object or points file name
1417
1418	mesh_origin: A geo_reference object or 3-tuples consisting of
1419	UTM zone, easting and northing.
1420	If specified vertex coordinates are assumed to be
1421	relative to their respective origins.
1422	"""
1423
1424	# Input check
1425	if isinstance(points, basestring):
1426	# It's a string - assume it is a point file
1427	points = Geospatial_data(file_name=points)
1428
1429	if isinstance(points, Geospatial_data):
1430	points = points.get_data_points(absolute=True)
1431	msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1432	assert geo_reference == None, msg
1433	else:
1434	points = ensure_numeric(points, num.float)
1435
1436	# Sort of geo_reference and convert points
1437	if geo_reference is None:
1438	geo = None # Geo_reference()
1439	else:
1440	if isinstance(geo_reference, Geo_reference):
1441	geo = geo_reference
1442	else:
1443	geo = Geo_reference(geo_reference[0],
1444	geo_reference[1],
1445	geo_reference[2])
1446	points = geo.get_absolute(points)
1447
1448	return points
1449
1450
1451	##
1452	# @brief
1453	# @param points
1454	# @param geo_reference
1455	# @return A geospatial object.
1456	def ensure_geospatial(points, geo_reference=None):
1457	"""Convert various data formats to a geospatial_data instance.
1458
1459	Inputed formats are;
1460	points: List or numeric array of coordinate pairs [xi, eta] of
1461	points or geospatial object
1462
1463	mesh_origin: A geo_reference object or 3-tuples consisting of
1464	UTM zone, easting and northing.
1465	If specified vertex coordinates are assumed to be
1466	relative to their respective origins.
1467	"""
1468
1469	# Input check
1470	if isinstance(points, Geospatial_data):
1471	msg = "Use a Geospatial_data object or a mesh origin, not both."
1472	assert geo_reference is None, msg
1473	return points
1474	else:
1475	# List or numeric array of absolute points
1476	points = ensure_numeric(points, num.float)
1477
1478	# Sort out geo reference
1479	if geo_reference is None:
1480	geo = None
1481	else:
1482	if isinstance(geo_reference, Geo_reference):
1483	geo = geo_reference
1484	else:
1485	geo = Geo_reference(geo_reference[0],
1486	geo_reference[1],
1487	geo_reference[2])
1488
1489	# Create Geospatial_data object with appropriate geo reference and return
1490	points = Geospatial_data(data_points=points, geo_reference=geo)
1491
1492	return points
1493
1494
1495	##
1496	# @brief
1497	# @param data_file
1498	# @param alpha_list
1499	# @param mesh_file
1500	# @param boundary_poly
1501	# @param mesh_resolution
1502	# @param north_boundary
1503	# @param south_boundary
1504	# @param east_boundary
1505	# @param west_boundary
1506	# @param plot_name
1507	# @param split_factor
1508	# @param seed_num
1509	# @param cache
1510	# @param verbose
1511	def find_optimal_smoothing_parameter(data_file,
1512	alpha_list=None,
1513	mesh_file=None,
1514	boundary_poly=None,
1515	mesh_resolution=100000,
1516	north_boundary=None,
1517	south_boundary=None,
1518	east_boundary=None,
1519	west_boundary=None,
1520	plot_name='all_alphas',
1521	split_factor=0.1,
1522	seed_num=None,
1523	cache=False,
1524	verbose=False):
1525	"""Removes a small random sample of points from 'data_file'.
1526	Then creates models with different alpha values from 'alpha_list' and
1527	cross validates the predicted value to the previously removed point data.
1528	Returns the alpha value which has the smallest covariance.
1529
1530	data_file: must not contain points outside the boundaries defined
1531	and it either a pts, txt or csv file.
1532
1533	alpha_list: the alpha values to test in a single list
1534
1535	mesh_file: name of the created mesh file or if passed in will read it.
1536	NOTE, if there is a mesh file mesh_resolution,
1537	north_boundary, south... etc will be ignored.
1538
1539	mesh_resolution: the maximum area size for a triangle
1540
1541	north_boundary... west_boundary: the value of the boundary
1542
1543	plot_name: the name for the plot contain the results
1544
1545	seed_num: the seed to the random number generator
1546
1547	USAGE:
1548	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1549	alpha_list=[0.0001, 0.01, 1],
1550	mesh_file=None,
1551	mesh_resolution=3,
1552	north_boundary=5,
1553	south_boundary=-5,
1554	east_boundary=5,
1555	west_boundary=-5,
1556	plot_name='all_alphas',
1557	seed_num=100000,
1558	verbose=False)
1559
1560	OUTPUT: returns the minumum normalised covalance calculate AND the
1561	alpha that created it. PLUS writes a plot of the results
1562
1563	NOTE: code will not work if the data_file extent is greater than the
1564	boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1565	"""
1566
1567	from anuga.shallow_water import Domain
1568	from anuga.geospatial_data.geospatial_data import Geospatial_data
1569	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1570	from anuga.utilities.numerical_tools import cov
1571	from anuga.utilities.polygon import is_inside_polygon
1572	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1573
1574
1575	attribute_smoothed = 'elevation'
1576
1577	if mesh_file is None:
1578	if verbose: print "building mesh"
1579	mesh_file = 'temp.msh'
1580
1581	if north_boundary is None or south_boundary is None \
1582	or east_boundary is None or west_boundary is None:
1583	no_boundary = True
1584	else:
1585	no_boundary = False
1586
1587	if no_boundary is True:
1588	msg = 'All boundaries must be defined'
1589	raise Expection, msg
1590
1591	poly_topo = [[east_boundary,south_boundary],
1592	[east_boundary,north_boundary],
1593	[west_boundary,north_boundary],
1594	[west_boundary,south_boundary]]
1595
1596	create_mesh_from_regions(poly_topo,
1597	boundary_tags={'back': [2],
1598	'side': [1,3],
1599	'ocean': [0]},
1600	maximum_triangle_area=mesh_resolution,
1601	filename=mesh_file,
1602	use_cache=cache,
1603	verbose=verbose)
1604
1605	else: # if mesh file provided
1606	# test mesh file exists?
1607	if verbose: "reading from file: %s" % mesh_file
1608	if access(mesh_file,F_OK) == 0:
1609	msg = "file %s doesn't exist!" % mesh_file
1610	raise IOError, msg
1611
1612	# split topo data
1613	if verbose: print 'Reading elevation file: %s' % data_file
1614	G = Geospatial_data(file_name = data_file)
1615	if verbose: print 'Start split'
1616	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1617	if verbose: print 'Finish split'
1618	points = G_small.get_data_points()
1619
1620	if verbose: print "Number of points in sample to compare: ", len(points)
1621
1622	if alpha_list == None:
1623	alphas = [0.001,0.01,100]
1624	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1625	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1626	else:
1627	alphas = alpha_list
1628
1629	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1630	# 4 onwards is the elevation_predicted using the alpha, which will
1631	# be compared later against the real removed data
1632	data = num.array([], dtype=num.float)
1633
1634	data = num.resize(data, (len(points), 3+len(alphas)))
1635
1636	# gets relative point from sample
1637	data[:,0] = points[:,0]
1638	data[:,1] = points[:,1]
1639	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1640	data[:,2] = elevation_sample
1641
1642	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1643
1644	if verbose: print 'Setup computational domains with different alphas'
1645
1646	for i, alpha in enumerate(alphas):
1647	# add G_other data to domains with different alphas
1648	if verbose:
1649	print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1650	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1651	if verbose: print domain.statistics()
1652	domain.set_quantity(attribute_smoothed,
1653	geospatial_data=G_other,
1654	use_cache=cache,
1655	verbose=verbose,
1656	alpha=alpha)
1657
1658	# Convert points to geospatial data for use with get_values below
1659	points_geo = Geospatial_data(points, domain.geo_reference)
1660
1661	# returns the predicted elevation of the points that were "split" out
1662	# of the original data set for one particular alpha
1663	if verbose: print 'Get predicted elevation for location to be compared'
1664	elevation_predicted = \
1665	domain.quantities[attribute_smoothed].\
1666	get_values(interpolation_points=points_geo)
1667
1668	# add predicted elevation to array that starts with x, y, z...
1669	data[:,i+3] = elevation_predicted
1670
1671	sample_cov = cov(elevation_sample)
1672	ele_cov = cov(elevation_sample - elevation_predicted)
1673	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1674
1675	if verbose:
1676	print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1677	normal_cov[i][1]
1678	print '-------------------------------------------- \n'
1679
1680	normal_cov0 = normal_cov[:,0]
1681	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1682
1683	if plot_name is not None:
1684	from pylab import savefig, semilogx, loglog
1685
1686	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1687	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1688	savefig(plot_name, dpi=300)
1689
1690	if mesh_file == 'temp.msh':
1691	remove(mesh_file)
1692
1693	if verbose:
1694	print 'Final results:'
1695	for i, alpha in enumerate(alphas):
1696	print 'covariance for alpha %s = %s ' \
1697	% (normal_cov[i][0], normal_cov[i][1])
1698	print '\n Optimal alpha is: %s ' \
1699	% normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0]
1700
1701	# covariance and optimal alpha
1702	return (min(normal_cov_new[:,1]),
1703	normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1704
1705
1706	##
1707	# @brief
1708	# @param data_file
1709	# @param alpha_list
1710	# @param mesh_file
1711	# @param boundary_poly
1712	# @param mesh_resolution
1713	# @param north_boundary
1714	# @param south_boundary
1715	# @param east_boundary
1716	# @param west_boundary
1717	# @param plot_name
1718	# @param split_factor
1719	# @param seed_num
1720	# @param cache
1721	# @param verbose
1722	def old_find_optimal_smoothing_parameter(data_file,
1723	alpha_list=None,
1724	mesh_file=None,
1725	boundary_poly=None,
1726	mesh_resolution=100000,
1727	north_boundary=None,
1728	south_boundary=None,
1729	east_boundary=None,
1730	west_boundary=None,
1731	plot_name='all_alphas',
1732	split_factor=0.1,
1733	seed_num=None,
1734	cache=False,
1735	verbose=False):
1736	"""
1737	data_file: must not contain points outside the boundaries defined
1738	and it either a pts, txt or csv file.
1739
1740	alpha_list: the alpha values to test in a single list
1741
1742	mesh_file: name of the created mesh file or if passed in will read it.
1743	NOTE, if there is a mesh file mesh_resolution,
1744	north_boundary, south... etc will be ignored.
1745
1746	mesh_resolution: the maximum area size for a triangle
1747
1748	north_boundary... west_boundary: the value of the boundary
1749
1750	plot_name: the name for the plot contain the results
1751
1752	seed_num: the seed to the random number generator
1753
1754	USAGE:
1755	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1756	alpha_list=[0.0001, 0.01, 1],
1757	mesh_file=None,
1758	mesh_resolution=3,
1759	north_boundary=5,
1760	south_boundary=-5,
1761	east_boundary=5,
1762	west_boundary=-5,
1763	plot_name='all_alphas',
1764	seed_num=100000,
1765	verbose=False)
1766
1767	OUTPUT: returns the minumum normalised covalance calculate AND the
1768	alpha that created it. PLUS writes a plot of the results
1769
1770	NOTE: code will not work if the data_file extend is greater than the
1771	boundary_polygon or the north_boundary...west_boundary
1772	"""
1773
1774	from anuga.shallow_water import Domain
1775	from anuga.geospatial_data.geospatial_data import Geospatial_data
1776	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1777	from anuga.utilities.numerical_tools import cov
1778	from anuga.utilities.polygon import is_inside_polygon
1779	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1780
1781
1782	attribute_smoothed = 'elevation'
1783
1784	if mesh_file is None:
1785	mesh_file = 'temp.msh'
1786
1787	if north_boundary is None or south_boundary is None \
1788	or east_boundary is None or west_boundary is None:
1789	no_boundary = True
1790	else:
1791	no_boundary = False
1792
1793	if no_boundary is True:
1794	msg = 'All boundaries must be defined'
1795	raise Expection, msg
1796
1797	poly_topo = [[east_boundary,south_boundary],
1798	[east_boundary,north_boundary],
1799	[west_boundary,north_boundary],
1800	[west_boundary,south_boundary]]
1801
1802	create_mesh_from_regions(poly_topo,
1803	boundary_tags={'back': [2],
1804	'side': [1,3],
1805	'ocean': [0]},
1806	maximum_triangle_area=mesh_resolution,
1807	filename=mesh_file,
1808	use_cache=cache,
1809	verbose=verbose)
1810
1811	else: # if mesh file provided
1812	# test mesh file exists?
1813	if access(mesh_file,F_OK) == 0:
1814	msg = "file %s doesn't exist!" % mesh_file
1815	raise IOError, msg
1816
1817	# split topo data
1818	G = Geospatial_data(file_name=data_file)
1819	if verbose: print 'start split'
1820	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1821	if verbose: print 'finish split'
1822	points = G_small.get_data_points()
1823
1824	# FIXME: Remove points outside boundary polygon
1825	# print 'new point',len(points)
1826	#
1827	# new_points=[]
1828	# new_points=array([],dtype=float)
1829	# new_points=resize(new_points,(len(points),2))
1830	# print "BOUNDARY", boundary_poly
1831	# for i,point in enumerate(points):
1832	# if is_inside_polygon(point,boundary_poly, verbose=True):
1833	# new_points[i] = point
1834	# print"WOW",i,new_points[i]
1835	# points = new_points
1836
1837	if verbose: print "Number of points in sample to compare: ", len(points)
1838
1839	if alpha_list == None:
1840	alphas = [0.001,0.01,100]
1841	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1842	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1843	else:
1844	alphas = alpha_list
1845
1846	domains = {}
1847
1848	if verbose: print 'Setup computational domains with different alphas'
1849
1850	for alpha in alphas:
1851	# add G_other data to domains with different alphas
1852	if verbose:
1853	print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1854	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1855	if verbose: print domain.statistics()
1856	domain.set_quantity(attribute_smoothed,
1857	geospatial_data=G_other,
1858	use_cache=cache,
1859	verbose=verbose,
1860	alpha=alpha)
1861	domains[alpha] = domain
1862
1863	# creates array with columns 1 and 2 are x, y. column 3 is elevation
1864	# 4 onwards is the elevation_predicted using the alpha, which will
1865	# be compared later against the real removed data
1866	data = num.array([], dtype=num.float)
1867
1868	data = num.resize(data, (len(points), 3+len(alphas)))
1869
1870	# gets relative point from sample
1871	data[:,0] = points[:,0]
1872	data[:,1] = points[:,1]
1873	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1874	data[:,2] = elevation_sample
1875
1876	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1877
1878	if verbose:
1879	print 'Determine difference between predicted results and actual data'
1880	for i, alpha in enumerate(domains):
1881	if verbose: print'Alpha =', alpha
1882
1883	points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1884	# returns the predicted elevation of the points that were "split" out
1885	# of the original data set for one particular alpha
1886	elevation_predicted = \
1887	domains[alpha].quantities[attribute_smoothed].\
1888	get_values(interpolation_points=points_geo)
1889
1890	# add predicted elevation to array that starts with x, y, z...
1891	data[:,i+3] = elevation_predicted
1892
1893	sample_cov = cov(elevation_sample)
1894	ele_cov = cov(elevation_sample - elevation_predicted)
1895	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1896	print 'memory usage during compare', mem_usage()
1897	if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1898
1899	normal_cov0 = normal_cov[:,0]
1900	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1901
1902	if plot_name is not None:
1903	from pylab import savefig,semilogx,loglog
1904
1905	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1906	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1907	savefig(plot_name, dpi=300)
1908	if mesh_file == 'temp.msh':
1909	remove(mesh_file)
1910
1911	return (min(normal_cov_new[:,1]),
1912	normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1913
1914
1915	if __name__ == "__main__":
1916	pass
1917

Note: See TracBrowser for help on using the repository browser.

Download in other formats: