Context Navigation

source: branches/source_numpy_conversion/anuga/geospatial_data/geospatial_data.py @ 6768

Last change on this file since 6768 was 5945, checked in by rwilson, 16 years ago
More NumPy? changes.
File size: 67.7 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5	from sys import maxint
6	from os import access, F_OK, R_OK,remove
7	from types import DictType
8	from warnings import warn
9	from string import lower
10	import numpy
11	import numpy.random
12	#from Array import tolist
13	from copy import deepcopy
14
15
16	from Scientific.IO.NetCDF import NetCDFFile
17	from anuga.config import Float
18	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
19	from anuga.utilities.numerical_tools import ensure_numeric
20	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
21	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
22	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
23	from anuga.utilities.anuga_exceptions import ANUGAError
24	from anuga.config import points_file_block_line_size as MAX_READ_LINES
25	#from anuga.fit_interpolate.benchmark_least_squares import mem_usage
26
27
28	DEFAULT_ATTRIBUTE = 'elevation'
29
30	class Geospatial_data:
31
32	def __init__(self,
33	data_points=None, # this can also be a points file name
34	attributes=None,
35	geo_reference=None,
36	default_attribute_name=None,
37	file_name=None,
38	latitudes=None,
39	longitudes=None,
40	points_are_lats_longs=False,
41	max_read_lines=None,
42	load_file_now=True,
43	verbose=False):
44
45
46	"""
47	Create instance from data points and associated attributes
48
49	data_points: x,y coordinates in meters. Type must be either a
50	sequence of 2-tuples or an Mx2 Numeric array of floats. A file name
51	with extension .txt, .cvs or .pts can also be passed in here.
52
53	attributes: Associated values for each data point. The type
54	must be either a list or an array of length M or a dictionary
55	of lists (or arrays) of length M. In the latter case the keys
56	in the dictionary represent the attribute names, in the former
57	the attribute will get the default name "elevation".
58
59	geo_reference: Object representing the origin of the data
60	points. It contains UTM zone, easting and northing and data
61	points are assumed to be relative to this origin.
62	If geo_reference is None, the default geo ref object is used.
63
64	default_attribute_name: Name of default attribute to be used with
65	get_attribute_values. The idea is that the dataset can be
66	equipped with information about which attribute to return.
67	If None, the default is the "first"
68
69	latitudes, longitudes: Vectors of latitudes and longitudes,
70	used to specify location instead of points.
71
72	points_are_lats_longs: Set this as true if the points are actually
73	lats and longs, not UTM
74
75	max_read_lines: The number of rows read into memory when using
76	blocking to read a file.
77
78	load_file_now: If true the file is automatically loaded
79	into the geospatial instance. Used when blocking.
80
81	file_name: Name of input netCDF file or .txt file. netCDF file must
82	have dimensions "points" etc.
83	.txt file is a comma seperated file with x, y and attribute
84	data.
85
86	The first line has the titles of the columns. The first two
87	column titles are checked to see if they start with lat or
88	long (not case sensitive). If so the data is assumed to be
89	latitude and longitude, in decimal format and converted to
90	UTM. Otherwise the first two columns are assumed to be the x
91	and y, and the title names acually used are ignored.
92
93
94	The format for a .txt file is:
95	1st line: [column names]
96	other lines: x y [attributes]
97
98	for example:
99	x, y, elevation, friction
100	0.6, 0.7, 4.9, 0.3
101	1.9, 2.8, 5, 0.3
102	2.7, 2.4, 5.2, 0.3
103
104	The first two columns have to be x, y or lat, long
105	coordinates.
106
107
108	The format for a Points dictionary is:
109
110	['pointlist'] a 2 column array describing points. 1st column x,
111	2nd column y.
112	['attributelist'], a dictionary of 1D arrays, representing
113	attribute values at the point. The dictionary key is the attribute
114	header.
115	['geo_reference'] a Geo_refernece object. Use if the point
116	information is relative. This is optional.
117	eg
118	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
119	dic['attributelist']['elevation'] = [[7.0,5.0]
120
121	verbose:
122
123
124	"""
125
126	if isinstance(data_points, basestring):
127	# assume data point is really a file name
128	file_name = data_points
129
130	self.set_verbose(verbose)
131	self.geo_reference=None #create the attribute
132	self.file_name = file_name
133
134	if max_read_lines is None:
135	self.max_read_lines = MAX_READ_LINES
136	else:
137	self.max_read_lines = max_read_lines
138
139	if file_name is None:
140	if latitudes is not None or longitudes is not None or \
141	points_are_lats_longs:
142	data_points, geo_reference = \
143	_set_using_lat_long(latitudes=latitudes,
144	longitudes=longitudes,
145	geo_reference=geo_reference,
146	data_points=data_points,
147	points_are_lats_longs=points_are_lats_longs)
148	self.check_data_points(data_points)
149	self.set_attributes(attributes)
150	self.set_geo_reference(geo_reference)
151	self.set_default_attribute_name(default_attribute_name)
152
153	elif load_file_now is True:
154	# watch for case where file name and points,
155	# attributes etc are provided!!
156	# if file name then all provided info will be removed!
157
158	if verbose is True:
159	if file_name is not None:
160	print 'Loading Geospatial data from file: %s' %file_name
161
162	self.import_points_file(file_name, verbose=verbose)
163
164	self.check_data_points(self.data_points)
165	self.set_attributes(self.attributes)
166	self.set_geo_reference(self.geo_reference)
167	self.set_default_attribute_name(default_attribute_name)
168
169	if verbose is True:
170	if file_name is not None:
171	print 'Geospatial data created from file: %s' %file_name
172	if load_file_now is False:
173	print 'Data will be loaded blockwise on demand'
174
175	if file_name.endswith('csv') or file_name.endswith('txt'):
176	pass
177	# This message was misleading.
178	# FIXME (Ole): Are we blocking here or not?
179	#print 'ASCII formats are not that great for '
180	#print 'blockwise reading. Consider storing this'
181	#print 'data as a pts NetCDF format'
182
183	def __len__(self):
184	return len(self.data_points)
185
186	def __repr__(self):
187	return str(self.get_data_points(absolute=True))
188
189
190	def check_data_points(self, data_points):
191	"""Checks data points
192	"""
193
194	if data_points is None:
195	self.data_points = None
196	msg = 'There is no data or file provided!'
197	raise ValueError, msg
198
199	else:
200	self.data_points = ensure_numeric(data_points)
201	#print "self.data_points.shape",self.data_points.shape
202	if not (0,) == self.data_points.shape:
203	assert len(self.data_points.shape) == 2
204	assert self.data_points.shape[1] == 2
205
206	def set_attributes(self, attributes):
207	"""Check and assign attributes dictionary
208	"""
209
210	if attributes is None:
211	self.attributes = None
212	return
213
214	if not isinstance(attributes, DictType):
215	#Convert single attribute into dictionary
216	attributes = {DEFAULT_ATTRIBUTE: attributes}
217
218	#Check input attributes
219	for key in attributes.keys():
220	try:
221	attributes[key] = ensure_numeric(attributes[key])
222	except:
223	msg = 'Attribute %s could not be converted' %key
224	msg += 'to a numeric vector'
225	raise msg
226
227	self.attributes = attributes
228
229	#def set_geo_reference(self, geo_reference):
230	# # FIXME (Ole): Backwards compatibility - deprecate
231	# self.setgeo_reference(geo_reference)
232
233	def set_geo_reference(self, geo_reference):
234	"""Set the georeference of geospatial data.
235
236	It can also be used to change the georeference and will ensure that
237	the absolute coordinate values are unchanged.
238	"""
239	from anuga.coordinate_transforms.geo_reference import Geo_reference
240
241	if geo_reference is None:
242	# Use default - points are in absolute coordinates
243	geo_reference = Geo_reference()
244
245	# Allow for tuple (zone, xllcorner, yllcorner)
246	geo_reference = ensure_geo_reference(geo_reference)
247
248	if not isinstance(geo_reference, Geo_reference):
249	# FIXME (Ole): This exception will be raised even
250	# if geo_reference is None. Is that the intent Duncan?
251	msg = 'Argument geo_reference must be a valid Geo_reference \n'
252	msg += 'object or None.'
253	raise msg
254
255	# If a geo_reference already exists, change the point data according to
256	# the new geo reference
257	if self.geo_reference is not None:
258	self.data_points = self.get_data_points(geo_reference=geo_reference)
259
260	self.geo_reference = geo_reference
261
262
263	def set_default_attribute_name(self, default_attribute_name):
264	self.default_attribute_name = default_attribute_name
265
266	def set_verbose(self, verbose=False):
267	if verbose in [False, True]:
268	self.verbose = verbose
269	else:
270	msg = 'Illegal value: %s' %str(verbose)
271	raise Exception, msg
272
273	def clip(self, polygon, closed=True, verbose=False):
274	"""Clip geospatial data by a polygon
275
276	Input
277	polygon - Either a list of points, an Nx2 array or
278	a Geospatial data object.
279	closed - (optional) determine whether points on boundary should be
280	regarded as belonging to the polygon (closed = True)
281	or not (closed = False). Default is True.
282
283	Output
284	New geospatial data object representing points inside
285	specified polygon.
286
287
288	Note - this method is non-destructive and leaves the data in 'self'
289	unchanged
290	"""
291
292	from anuga.utilities.polygon import inside_polygon
293
294	if isinstance(polygon, Geospatial_data):
295	# Polygon is an object - extract points
296	polygon = polygon.get_data_points()
297
298	points = self.get_data_points()
299	# if verbose: print '%s points:%s' %(verbose,points)
300	inside_indices = inside_polygon(points, polygon, closed, verbose)
301
302	clipped_G = self.get_sample(inside_indices)
303
304	# clipped_points = take(points, inside_indices)
305
306	# Clip all attributes
307	# attributes = self.get_all_attributes()
308
309	# clipped_attributes = {}
310	# if attributes is not None:
311	# for key, att in attributes.items():
312	# clipped_attributes[key] = take(att, inside_indices)
313
314	# return Geospatial_data(clipped_points, clipped_attributes)
315	return clipped_G
316
317
318	def clip_outside(self, polygon, closed=True,verbose=False):
319	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
320
321	Input
322	polygon - Either a list of points, an Nx2 array or
323	a Geospatial data object.
324	closed - (optional) determine whether points on boundary should be
325	regarded as belonging to the polygon (closed = True)
326	or not (closed = False). Default is True.
327
328	Output
329	Geospatial data object representing point OUTSIDE specified polygon
330
331	"""
332
333	from anuga.utilities.polygon import outside_polygon
334
335	if isinstance(polygon, Geospatial_data):
336	# Polygon is an object - extract points
337	polygon = polygon.get_data_points()
338
339	points = self.get_data_points()
340	outside_indices = outside_polygon(points, polygon, closed,verbose)
341
342	clipped_G = self.get_sample(outside_indices)
343
344	# clipped_points = take(points, outside_indices)
345
346	# Clip all attributes
347	# attributes = self.get_all_attributes()
348
349	# clipped_attributes = {}
350	# if attributes is not None:
351	# for key, att in attributes.items():
352	# clipped_attributes[key] = take(att, outside_indices)
353
354	# return Geospatial_data(clipped_points, clipped_attributes)
355	return clipped_G
356
357
358	def get_geo_reference(self):
359	return self.geo_reference
360
361	def get_data_points(self, absolute=True, geo_reference=None,
362	as_lat_long=False, isSouthHemisphere=True):
363	"""Get coordinates for all data points as an Nx2 array
364
365	If absolute is False returned coordinates are relative to the
366	internal georeference's xll and yll corners, otherwise
367	absolute UTM coordinates are returned.
368
369	If a geo_reference is passed the points are returned relative
370	to that geo_reference.
371
372	isSH (isSouthHemisphere) is only used when getting data
373	points "as_lat_long" is True and if FALSE will return lats and
374	longs valid for the Northern Hemisphere.
375
376	Default: absolute is True.
377	"""
378	if as_lat_long is True:
379	msg = "Points need a zone to be converted into lats and longs"
380	assert self.geo_reference is not None, msg
381	zone = self.geo_reference.get_zone()
382	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
383	lats_longs = []
384	for point in self.get_data_points(True):
385
386	# UTMtoLL(northing, easting, zone,
387	lat_calced, long_calced = UTMtoLL(point[1],point[0],
388	zone, isSouthHemisphere)
389	lats_longs.append((lat_calced, long_calced)) # to hash
390	return lats_longs
391
392	if absolute is True and geo_reference is None:
393	return self.geo_reference.get_absolute(self.data_points)
394	elif geo_reference is not None:
395	return geo_reference.change_points_geo_ref(self.data_points,
396	self.geo_reference)
397	else:
398	# If absolute is False
399	return self.data_points
400
401
402	def get_attributes(self, attribute_name=None):
403	"""Return values for one named attribute.
404
405	If attribute_name is None, default_attribute_name is used
406	"""
407
408	if attribute_name is None:
409	if self.default_attribute_name is not None:
410	attribute_name = self.default_attribute_name
411	else:
412	attribute_name = self.attributes.keys()[0]
413	# above line takes the first one from keys
414
415	if self.verbose is True:
416	print 'Using attribute %s' %attribute_name
417	print 'Available attributes: %s' %(self.attributes.keys())
418
419	msg = 'Attribute name %s does not exist in data set' %attribute_name
420	assert self.attributes.has_key(attribute_name), msg
421
422	return self.attributes[attribute_name]
423
424	def get_all_attributes(self):
425	"""
426	Return values for all attributes.
427	The return value is either None or a dictionary (possibly empty).
428	"""
429
430	return self.attributes
431
432	def __add__(self, other):
433	"""
434	Returns the addition of 2 geospatical objects,
435	objects are concatencated to the end of each other
436
437	NOTE: doesn't add if objects contain different
438	attributes
439
440	Always return absolute points!
441	This alse means, that if you add None to the object,
442	it will be turned into absolute coordinates
443
444	other can be None in which case nothing is added to self.
445	"""
446
447
448	# find objects zone and checks if the same
449	geo_ref1 = self.get_geo_reference()
450	zone1 = geo_ref1.get_zone()
451
452
453	if other is not None:
454
455	geo_ref2 = other.get_geo_reference()
456	zone2 = geo_ref2.get_zone()
457
458	geo_ref1.reconcile_zones(geo_ref2)
459
460
461	new_points = numpy.concatenate((self.get_data_points(absolute=True),
462	other.get_data_points(absolute=True)),
463	axis = 0)
464
465
466
467	# Concatenate attributes if any
468	if self.attributes is None:
469	if other.attributes is not None:
470	msg = 'Geospatial data must have the same \n'
471	msg += 'attributes to allow addition.'
472	raise Exception, msg
473
474	new_attributes = None
475	else:
476	new_attributes = {}
477	for x in self.attributes.keys():
478	if other.attributes.has_key(x):
479
480	attrib1 = self.attributes[x]
481	attrib2 = other.attributes[x]
482	new_attributes[x] = numpy.concatenate((attrib1, attrib2))
483
484	else:
485	msg = 'Geospatial data must have the same \n'
486	msg += 'attributes to allow addition.'
487	raise Exception, msg
488
489
490	else:
491	#other is None:
492
493	new_points = self.get_data_points(absolute=True)
494	new_attributes = self.attributes
495
496
497
498	# Instantiate new data object and return absolute coordinates
499	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
500	return Geospatial_data(new_points,
501	new_attributes,
502	new_geo_ref)
503
504
505	def __radd__(self, other):
506	"""Handle cases like None + Geospatial_data(...)
507	"""
508
509	return self + other
510
511
512
513	###
514	# IMPORT/EXPORT POINTS FILES
515	###
516
517	def import_points_file(self, file_name, delimiter=None, verbose=False):
518	""" load an .txt, .csv or .pts file
519	Note: will throw an IOError/SyntaxError if it can't load the file.
520	Catch these!
521
522	Post condition: self.attributes dictionary has been set
523	"""
524
525	if access(file_name, F_OK) == 0 :
526	msg = 'File %s does not exist or is not accessible' %file_name
527	raise IOError, msg
528
529	attributes = {}
530	if file_name[-4:]== ".pts":
531	try:
532	data_points, attributes, geo_reference =\
533	_read_pts_file(file_name, verbose)
534	except IOError, e:
535	msg = 'Could not open file %s ' %file_name
536	raise IOError, msg
537
538	elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
539	try:
540	data_points, attributes, geo_reference =\
541	_read_csv_file(file_name, verbose)
542	except IOError, e:
543	# This should only be if a file is not found
544	msg = 'Could not open file %s. ' %file_name
545	msg += 'Check the file location.'
546	raise IOError, msg
547	except SyntaxError, e:
548	# This should only be if there is a format error
549	msg = 'Could not open file %s. \n' %file_name
550	msg += Error_message['IOError']
551	#print "msg", msg
552	raise SyntaxError, msg
553	else:
554	msg = 'Extension %s is unknown' %file_name[-4:]
555	raise IOError, msg
556	# print'in import data_points', data_points
557	# print'in import attributes', attributes
558	# print'in import data_points', geo_reference
559	self.data_points = data_points
560	self.attributes = attributes
561	self.geo_reference = geo_reference
562
563	# return all_data
564
565	def export_points_file(self, file_name, absolute=True,
566	as_lat_long=False, isSouthHemisphere=True):
567
568	"""
569	write a points file, file_name, as a text (.csv) or binary (.pts) file
570	file_name is the file name, including the extension
571	The point_dict is defined at the top of this file.
572
573	If absolute is True data the xll and yll are added to the points value
574	and the xll and yll of the geo_reference are set to 0.
575
576	If absolute is False data points at returned as relative to the xll
577	and yll and geo_reference remains uneffected
578
579	isSouthHemisphere: is only used when getting data
580	points "as_lat_long" is True and if FALSE will return lats and
581	longs valid for the Northern Hemisphere.
582
583	"""
584
585	if (file_name[-4:] == ".pts"):
586	if absolute is True:
587	geo_ref = deepcopy(self.geo_reference)
588	geo_ref.xllcorner = 0
589	geo_ref.yllcorner = 0
590	_write_pts_file(file_name,
591	self.get_data_points(absolute),
592	self.get_all_attributes(),
593	geo_ref)
594	else:
595	_write_pts_file(file_name,
596	self.get_data_points(absolute),
597	self.get_all_attributes(),
598	self.get_geo_reference())
599
600	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
601	msg = "ERROR: trying to write a .txt file with relative data."
602	assert absolute, msg
603	_write_csv_file(file_name,
604	self.get_data_points(absolute=True,
605	as_lat_long=as_lat_long,
606	isSouthHemisphere=isSouthHemisphere),
607	self.get_all_attributes(),
608	as_lat_long=as_lat_long)
609
610	elif file_name[-4:] == ".urs" :
611	msg = "ERROR: Can not write a .urs file as a relative file."
612	assert absolute, msg
613	_write_urs_file(file_name,
614	self.get_data_points(as_lat_long=True,
615	isSouthHemisphere=isSouthHemisphere))
616
617	else:
618	msg = 'Unknown file type %s ' %file_name
619	raise IOError, msg
620
621	def get_sample(self, indices):
622	""" Returns a object which is a subset of the original
623	and the data points and attributes in this new object refer to
624	the indices provided
625
626	Input
627	indices- a list of integers that represent the new object
628	Output
629	New geospatial data object representing points specified by
630	the indices
631	"""
632	#FIXME: add the geo_reference to this
633	# print 'hello from get_sample'
634	points = self.get_data_points()
635	sampled_points = numpy.take(points, indices) ##FIX?
636
637	attributes = self.get_all_attributes()
638
639	sampled_attributes = {}
640	if attributes is not None:
641	for key, att in attributes.items():
642	sampled_attributes[key] = numpy.take(att, indices) ##FIX?
643
644	# print 'goodbye from get_sample'
645
646	return Geospatial_data(sampled_points, sampled_attributes)
647
648
649	def split(self, factor=0.5,seed_num=None, verbose=False):
650	"""Returns two
651	geospatial_data object, first is the size of the 'factor'
652	smaller the original and the second is the remainder. The two
653	new object are disjoin set of each other.
654
655	Points of the two new object have selected RANDOMLY.
656
657	This method create two lists of indices which are passed into
658	get_sample. The lists are created using random numbers, and
659	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
660	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
661
662	Input - the factor which to split the object, if 0.1 then 10% of the
663	together object will be returned
664
665	Output - two geospatial_data objects that are disjoint sets of the
666	original
667	"""
668
669	i=0
670	self_size = len(self)
671	random_list = []
672	remainder_list = []
673	new_size = round(factor*self_size)
674
675	# Find unique random numbers
676	if verbose: print "make unique random number list and get indices"
677
678	total=numpy.array(range(self_size))
679	total_list = total.tolist()
680	if verbose: print "total list len",len(total_list)
681
682	# There will be repeated random numbers however will not be a
683	# problem as they are being 'pop'ed out of array so if there
684	# are two numbers the same they will pop different indicies,
685	# still basically random
686	## create list of non-unquie random numbers
687	if verbose: print "create random numbers list %s long" %new_size
688
689	# Set seed if provided, mainly important for unit test!
690	# plus recalcule seed when no seed provided.
691	if seed_num != None:
692	numpy.random.seed(seed_num)
693	else:
694	numpy.random.seed()
695	if verbose: print "seed:", numpy.random.get_seed()
696
697	#print 'size',self_size, new_size
698	random_num = numpy.random.randint(0,self_size-1,(int(new_size),))
699	#print 'random num',random_num
700	random_num = random_num.tolist()
701
702	#need to sort and reverse so the pop() works correctly
703	random_num.sort()
704	random_num.reverse()
705
706	if verbose: print "make random number list and get indices"
707	j=0
708	k=1
709	remainder_list = total_list[:]
710	# pops array index (random_num) from remainder_list
711	# (which starts as the
712	# total_list and appends to random_list
713	random_num_len = len(random_num)
714	for i in random_num:
715	random_list.append(remainder_list.pop(i))
716	j+=1
717	#prints progress
718	if verbose and round(random_num_len/10*k)==j:
719	print '(%s/%s)' %(j, random_num_len)
720	k+=1
721
722	# FIXME: move to tests, it might take a long time
723	# then create an array of random lenght between 500 and 1000,
724	# and use a random factor between 0 and 1
725	# setup for assertion
726	test_total = random_list[:]
727	test_total.extend(remainder_list)
728	test_total.sort()
729	msg = 'The two random lists made from the original list when added '\
730	'together DO NOT equal the original list'
731	assert (total_list==test_total),msg
732
733	# Get new samples
734	if verbose: print "get values of indices for random list"
735	G1 = self.get_sample(random_list)
736	if verbose: print "get values of indices for opposite of random list"
737	G2 = self.get_sample(remainder_list)
738
739	return G1, G2
740
741	def __iter__(self):
742	"""Read in the header, number_of_points and save the
743	file pointer position
744	"""
745	from Scientific.IO.NetCDF import NetCDFFile
746	#print "Starting to block"
747	#FIXME - what to do if the file isn't there
748
749	# FIXME (Ole): Shouldn't this go into the constructor?
750	# This method acts like the constructor when blcoking.
751	# ... and shouldn't it be called block_size?
752	#
753	if self.max_read_lines is None:
754	self.max_read_lines = MAX_READ_LINES
755	if self.file_name[-4:] == ".pts":
756
757	# See if the file is there. Throw a QUIET IO error if it isn't
758	fd = open(self.file_name,'r')
759	fd.close()
760
761	# Throws prints to screen if file not present
762	self.fid = NetCDFFile(self.file_name, 'r')
763
764	self.blocking_georef, self.blocking_keys, self.number_of_points =\
765	_read_pts_file_header(self.fid,
766	self.verbose)
767	self.start_row = 0
768	self.last_row = self.number_of_points
769	self.show_verbose = 0
770	self.verbose_block_size = (self.last_row + 10)/10
771	self.block_number = 0
772	self.number_of_blocks = self.number_of_points/self.max_read_lines
773	# This computes the number of full blocks. The last block may be
774	# smaller and won't be ircluded in this estimate.
775
776	if self.verbose is True:
777	print 'Reading %d points (in ~%d blocks) from file %s. '\
778	%(self.number_of_points,
779	self.number_of_blocks,
780	self.file_name),
781	print 'Each block consists of %d data points'\
782	%self.max_read_lines
783
784	else:
785	# Assume the file is a csv file
786	file_pointer = open(self.file_name)
787	self.header, self.file_pointer = \
788	_read_csv_file_header(file_pointer)
789	self.blocking_georef = None # Used for reconciling zones
790
791	return self
792
793
794	def next(self):
795	""" read a block, instanciate a new geospatial and return it"""
796
797	if self.file_name[-4:] == ".pts":
798	if self.start_row == self.last_row:
799	# Read the end of the file last iteration
800	# Remove blocking attributes
801	self.fid.close()
802	del self.max_read_lines
803	del self.blocking_georef
804	del self.last_row
805	del self.start_row
806	del self.blocking_keys
807	del self.fid
808	raise StopIteration
809	fin_row = self.start_row + self.max_read_lines
810	if fin_row > self.last_row:
811	fin_row = self.last_row
812
813
814
815	if self.verbose is True:
816	if self.show_verbose >= self.start_row and \
817	self.show_verbose < fin_row:
818
819
820	#print 'Doing %d of %d' %(self.start_row, self.last_row+10)
821
822	print 'Reading block %d (points %d to %d) out of %d'\
823	%(self.block_number,
824	self.start_row,
825	fin_row,
826	self.number_of_blocks)
827
828
829	self.show_verbose += max(self.max_read_lines,
830	self.verbose_block_size)
831
832
833	# Read next block
834	pointlist, att_dict, = \
835	_read_pts_file_blocking(self.fid,
836	self.start_row,
837	fin_row,
838	self.blocking_keys)
839
840	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
841	self.start_row = fin_row
842
843	self.block_number += 1
844
845	else:
846	# Assume the file is a csv file
847	try:
848	#print "self.max_read_lines", self.max_read_lines
849	pointlist, att_dict, geo_ref, self.file_pointer = \
850	_read_csv_file_blocking( self.file_pointer,
851	self.header[:],
852	max_read_lines=self.max_read_lines,
853	verbose=self.verbose)
854
855	# Check that the zones haven't changed.
856	if geo_ref is not None:
857	geo_ref.reconcile_zones(self.blocking_georef)
858	self.blocking_georef = geo_ref
859	elif self.blocking_georef is not None:
860
861	msg = 'Geo reference given, then not given.'
862	msg += ' This should not happen.'
863	raise ValueError, msg
864	geo = Geospatial_data(pointlist, att_dict, geo_ref)
865	except StopIteration:
866	self.file_pointer.close()
867	del self.header
868	del self.file_pointer
869	raise StopIteration
870	except ANUGAError:
871	self.file_pointer.close()
872	del self.header
873	del self.file_pointer
874	raise
875	except SyntaxError:
876	self.file_pointer.close()
877	del self.header
878	del self.file_pointer
879	# This should only be if there is a format error
880	msg = 'Could not open file %s. \n' %self.file_name
881	msg += Error_message['IOError']
882	raise SyntaxError, msg
883	return geo
884
885
886	##################### Error messages ###########
887	Error_message = {}
888	Em = Error_message
889	Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
890	Em['IOError'] += " 1st line: [column names]\n"
891	Em['IOError'] += " other lines: [x value], [y value], [attributes]\n"
892	Em['IOError'] += "\n"
893	Em['IOError'] += " for example:\n"
894	Em['IOError'] += " x, y, elevation, friction\n"
895	Em['IOError'] += " 0.6, 0.7, 4.9, 0.3\n"
896	Em['IOError'] += " 1.9, 2.8, 5, 0.3\n"
897	Em['IOError'] += " 2.7, 2.4, 5.2, 0.3\n"
898	Em['IOError'] += "\n"
899	Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
900	Em['IOError'] += "The attribute values must be numeric.\n"
901
902	def _set_using_lat_long(latitudes,
903	longitudes,
904	geo_reference,
905	data_points,
906	points_are_lats_longs):
907	"""
908	if the points has lat long info, assume it is in (lat, long) order.
909	"""
910
911	if geo_reference is not None:
912	msg = """A georeference is specified yet latitude and longitude
913	are also specified!"""
914	raise ValueError, msg
915
916	if data_points is not None and not points_are_lats_longs:
917	msg = """Data points are specified yet latitude and
918	longitude are also specified."""
919	raise ValueError, msg
920
921	if points_are_lats_longs:
922	if data_points is None:
923	msg = """Data points are not specified."""
924	raise ValueError, msg
925	lats_longs = ensure_numeric(data_points)
926	latitudes = numpy.ravel(lats_longs[:,0:1])
927	longitudes = numpy.ravel(lats_longs[:,1:])
928
929	if latitudes is None and longitudes is None:
930	msg = """Latitudes and Longitudes are not specified."""
931	raise ValueError, msg
932
933	if latitudes is None:
934	msg = """Longitudes are specified yet latitudes aren't."""
935	raise ValueError, msg
936
937	if longitudes is None:
938	msg = """Latitudes are specified yet longitudes aren't."""
939	raise ValueError, msg
940
941	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
942	longitudes=longitudes)
943	return data_points, Geo_reference(zone=zone)
944
945
946	def _read_pts_file(file_name, verbose=False):
947	"""Read .pts NetCDF file
948
949	Return a dic of array of points, and dic of array of attribute
950	eg
951	dic['points'] = [[1.0,2.0],[3.0,5.0]]
952	dic['attributelist']['elevation'] = [[7.0,5.0]
953	"""
954
955	from Scientific.IO.NetCDF import NetCDFFile
956
957	if verbose: print 'Reading ', file_name
958
959
960	# See if the file is there. Throw a QUIET IO error if it isn't
961	fd = open(file_name,'r')
962	fd.close()
963
964	# Throws prints to screen if file not present
965	fid = NetCDFFile(file_name, 'r')
966
967	pointlist = numpy.array(fid.variables['points'])
968	keys = fid.variables.keys()
969	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
970	try:
971	keys.remove('points')
972	except IOError, e:
973	fid.close()
974	msg = 'Expected keyword "points" but could not find it'
975	raise IOError, msg
976
977	attributes = {}
978	for key in keys:
979	if verbose: print "reading attribute '%s'" %key
980
981	attributes[key] = numpy.array(fid.variables[key])
982
983
984	try:
985	geo_reference = Geo_reference(NetCDFObject=fid)
986	except AttributeError, e:
987	geo_reference = None
988
989	fid.close()
990
991	return pointlist, attributes, geo_reference
992
993
994	def _read_csv_file(file_name, verbose=False):
995	"""Read .csv file
996
997	Return a dic of array of points, and dic of array of attribute
998	eg
999	dic['points'] = [[1.0,2.0],[3.0,5.0]]
1000	dic['attributelist']['elevation'] = [[7.0,5.0]
1001	"""
1002
1003	file_pointer = open(file_name)
1004	header, file_pointer = _read_csv_file_header(file_pointer)
1005	try:
1006	pointlist, att_dict, geo_ref, file_pointer = \
1007	_read_csv_file_blocking( \
1008	file_pointer,
1009	header,
1010	max_read_lines=1e30) #If the file is bigger that this, block.. # FIXME (Ole) What's up here?
1011
1012	except ANUGAError:
1013	file_pointer.close()
1014	raise
1015	file_pointer.close()
1016	return pointlist, att_dict, geo_ref
1017
1018	CSV_DELIMITER = ','
1019	def _read_csv_file_header(file_pointer,
1020	delimiter=CSV_DELIMITER,
1021	verbose=False):
1022
1023	"""Read the header of a .csv file
1024	Return a list of the header names
1025	"""
1026	line = file_pointer.readline()
1027	header = clean_line(line, delimiter)
1028	return header, file_pointer
1029
1030	def _read_csv_file_blocking(file_pointer, header,
1031	delimiter=CSV_DELIMITER,
1032	max_read_lines=MAX_READ_LINES,
1033	verbose=False):
1034
1035
1036	"""
1037	Read the body of a .csv file.
1038	header: The list header of the csv file, with the x and y labels.
1039	"""
1040	points = []
1041	pointattributes = []
1042	att_dict = {}
1043
1044	# This is to remove the x and y headers.
1045	header = header[:]
1046	try:
1047	x_header = header.pop(0)
1048	y_header = header.pop(0)
1049	except IndexError:
1050	# if there are not two columns this will occur.
1051	# eg if it is a space seperated file
1052	raise SyntaxError
1053
1054	read_lines = 0
1055	while read_lines<max_read_lines:
1056	line = file_pointer.readline()
1057	#print "line",line
1058	numbers = clean_line(line,delimiter)
1059	if len(numbers) <= 1:
1060	break
1061	if line[0] == '#':
1062	continue
1063	read_lines += 1
1064	try:
1065	x = float(numbers[0])
1066	y = float(numbers[1])
1067	points.append([x,y])
1068	numbers.pop(0)
1069	numbers.pop(0)
1070	if len(header) != len(numbers):
1071	file_pointer.close()
1072	msg = "File load error. \
1073	There might be a problem with the file header"
1074	raise SyntaxError, msg
1075	for i,num in enumerate(numbers):
1076	num.strip()
1077	if num != '\n' and num != '':
1078	#attributes.append(float(num))
1079	att_dict.setdefault(header[i],[]).append(float(num))
1080	#except IOError:
1081	except ValueError:
1082	raise SyntaxError
1083	if points == []:
1084	raise StopIteration
1085
1086
1087	pointlist = numpy.array(points).astype(numpy.float)
1088	for key in att_dict.keys():
1089	att_dict[key] = numpy.array(att_dict[key]).astype(numpy.float)
1090
1091	# Do stuff here so the info is in lat's and longs
1092	geo_ref = None
1093	x_header = lower(x_header[:3])
1094	y_header = lower(y_header[:3])
1095	if (x_header == 'lon' or x_header == 'lat') and \
1096	(y_header == 'lon' or y_header == 'lat'):
1097	if x_header == 'lon':
1098	longitudes = numpy.ravel(pointlist[:,0:1])
1099	latitudes = numpy.ravel(pointlist[:,1:])
1100	else:
1101	latitudes = numpy.ravel(pointlist[:,0:1])
1102	longitudes = numpy.ravel(pointlist[:,1:])
1103
1104	pointlist, geo_ref = _set_using_lat_long(latitudes,
1105	longitudes,
1106	geo_reference=None,
1107	data_points=None,
1108	points_are_lats_longs=False)
1109	return pointlist, att_dict, geo_ref, file_pointer
1110
1111	def _read_pts_file_header(fid, verbose=False):
1112
1113	"""Read the geo_reference and number_of_points from a .pts file
1114	"""
1115
1116	keys = fid.variables.keys()
1117	try:
1118	keys.remove('points')
1119	except IOError, e:
1120	fid.close()
1121	msg = 'Expected keyword "points" but could not find it'
1122	raise IOError, msg
1123	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1124
1125	try:
1126	geo_reference = Geo_reference(NetCDFObject=fid)
1127	except AttributeError, e:
1128	geo_reference = None
1129
1130	return geo_reference, keys, fid.dimensions['number_of_points']
1131
1132	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1133	#verbose=False):
1134
1135
1136	"""
1137	Read the body of a .csv file.
1138	header: The list header of the csv file, with the x and y labels.
1139	"""
1140
1141	pointlist = numpy.array(fid.variables['points'][start_row:fin_row])
1142
1143	attributes = {}
1144	for key in keys:
1145	attributes[key] = numpy.array(fid.variables[key][start_row:fin_row])
1146
1147	return pointlist, attributes
1148
1149	def _write_pts_file(file_name,
1150	write_data_points,
1151	write_attributes=None,
1152	write_geo_reference=None):
1153	"""
1154	Write .pts NetCDF file
1155
1156	NOTE: Below might not be valid ask Duncan : NB 5/2006
1157
1158	WARNING: This function mangles the point_atts data structure
1159	#F??ME: (DSG)This format has issues.
1160	# There can't be an attribute called points
1161	# consider format change
1162	# method changed by NB not sure if above statement is correct
1163
1164	should create new test for this
1165	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1166	for key in point_atts.keys():
1167	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1168	assert key in legal_keys, msg
1169	"""
1170	from Scientific.IO.NetCDF import NetCDFFile
1171	# NetCDF file definition
1172	outfile = NetCDFFile(file_name, 'w')
1173
1174	# Create new file
1175	outfile.institution = 'Geoscience Australia'
1176	outfile.description = 'NetCDF format for compact and portable storage ' +\
1177	'of spatial point data'
1178
1179	# Dimension definitions
1180	shape = write_data_points.shape[0]
1181	outfile.createDimension('number_of_points', shape)
1182	outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1183
1184	# Variable definition
1185	outfile.createVariable('points', Float, ('number_of_points',
1186	'number_of_dimensions'))
1187
1188	#create variables
1189	outfile.variables['points'][:] = write_data_points #.astype(Float32)
1190
1191	if write_attributes is not None:
1192	for key in write_attributes.keys():
1193	outfile.createVariable(key, Float, ('number_of_points',))
1194	outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1195
1196	if write_geo_reference is not None:
1197	write_NetCDF_georeference(write_geo_reference, outfile)
1198
1199	outfile.close()
1200
1201	def _write_csv_file(file_name,
1202	write_data_points,
1203	write_attributes=None,
1204	as_lat_long=False,
1205	delimiter=','):
1206	"""
1207	export a file, file_name, with the csv format
1208
1209	"""
1210	points = write_data_points
1211	pointattributes = write_attributes
1212
1213	fd = open(file_name,'w')
1214	if as_lat_long:
1215	titlelist = "latitude" + delimiter + "longitude" + delimiter
1216	else:
1217	titlelist = "x" + delimiter + "y" + delimiter
1218	if pointattributes is not None:
1219	for title in pointattributes.keys():
1220	titlelist = titlelist + title + delimiter
1221	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1222	fd.write(titlelist+"\n")
1223
1224	# <x/lat> <y/long> [attributes]
1225	for i, vert in enumerate( points):
1226
1227
1228	if pointattributes is not None:
1229	attlist = ","
1230	for att in pointattributes.keys():
1231	attlist = attlist + str(pointattributes[att][i])+ delimiter
1232	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1233	attlist.strip()
1234	else:
1235	attlist = ''
1236
1237	fd.write(str(vert[0]) + delimiter +
1238	str(vert[1]) + attlist + "\n")
1239
1240	fd.close()
1241
1242	def _write_urs_file(file_name,
1243	points,
1244	delimiter=' '):
1245	"""
1246	export a file, file_name, with the urs format
1247	the data points are in lats and longs
1248
1249	"""
1250	fd = open(file_name,'w')
1251	fd.write(str(len(points))+"\n")
1252	# <lat> <long> <id#>
1253	for i, vert in enumerate( points):
1254	fd.write(str(round(vert[0],7)) + delimiter + \
1255	str(round(vert[1],7)) + delimiter +str(i)+ "\n")
1256	fd.close()
1257
1258	def _point_atts2array(point_atts):
1259	point_atts['pointlist'] = numpy.array(point_atts['pointlist']).astype(numpy.float)
1260
1261	for key in point_atts['attributelist'].keys():
1262	point_atts['attributelist'][key]=\
1263	numpy.array(point_atts['attributelist'][key]).astype(numpy.float)
1264	return point_atts
1265
1266
1267
1268
1269	def geospatial_data2points_dictionary(geospatial_data):
1270	"""Convert geospatial data to points_dictionary
1271	"""
1272
1273	points_dictionary = {}
1274	points_dictionary['pointlist'] = geospatial_data.data_points
1275
1276	points_dictionary['attributelist'] = {}
1277
1278	for attribute_name in geospatial_data.attributes.keys():
1279	val = geospatial_data.attributes[attribute_name]
1280	points_dictionary['attributelist'][attribute_name] = val
1281
1282	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1283
1284	return points_dictionary
1285
1286
1287	def points_dictionary2geospatial_data(points_dictionary):
1288	"""Convert points_dictionary to geospatial data object
1289	"""
1290
1291	msg = 'Points dictionary must have key pointlist'
1292	assert points_dictionary.has_key('pointlist'), msg
1293
1294	msg = 'Points dictionary must have key attributelist'
1295	assert points_dictionary.has_key('attributelist'), msg
1296
1297	if points_dictionary.has_key('geo_reference'):
1298	geo = points_dictionary['geo_reference']
1299	else:
1300	geo = None
1301
1302	return Geospatial_data(points_dictionary['pointlist'],
1303	points_dictionary['attributelist'],
1304	geo_reference = geo)
1305
1306	def clean_line(line,delimiter):
1307	"""Remove whitespace
1308	"""
1309	#print ">%s" %line
1310	line = line.strip()
1311	#print "stripped>%s" %line
1312	numbers = line.split(delimiter)
1313	i = len(numbers) - 1
1314	while i >= 0:
1315	if numbers[i] == '':
1316	numbers.pop(i)
1317	else:
1318	numbers[i] = numbers[i].strip()
1319
1320	i += -1
1321	#for num in numbers:
1322	# print "num>%s<" %num
1323	return numbers
1324
1325	def ensure_absolute(points, geo_reference=None):
1326	"""Ensure that points are in absolute coordinates.
1327
1328	This function inputs several formats and
1329	outputs one format. - a numeric array of absolute points.
1330
1331	Input formats are;
1332	points: List or numeric array of coordinate pairs [xi, eta] of
1333	points or geospatial object or points file name
1334
1335	mesh_origin: A geo_reference object or 3-tuples consisting of
1336	UTM zone, easting and northing.
1337	If specified vertex coordinates are assumed to be
1338	relative to their respective origins.
1339	"""
1340
1341	# Input check
1342	if isinstance(points, basestring):
1343	#It's a string - assume it is a point file
1344	points = Geospatial_data(file_name=points)
1345
1346	if isinstance(points, Geospatial_data):
1347	points = points.get_data_points(absolute=True)
1348	msg = 'Use a Geospatial_data object or a mesh origin. Not both.'
1349	assert geo_reference == None, msg
1350	else:
1351	points = ensure_numeric(points, numpy.float)
1352
1353	# Sort of geo_reference and convert points
1354	if geo_reference is None:
1355	geo = None #Geo_reference()
1356	else:
1357	if isinstance(geo_reference, Geo_reference):
1358	geo = geo_reference
1359	else:
1360	geo = Geo_reference(geo_reference[0],
1361	geo_reference[1],
1362	geo_reference[2])
1363	points = geo.get_absolute(points)
1364
1365	# Return
1366	return points
1367
1368
1369	def ensure_geospatial(points, geo_reference=None):
1370	"""
1371	This function inputs several formats and
1372	outputs one format. - a geospatial_data instance.
1373
1374	Inputed formats are;
1375	points: List or numeric array of coordinate pairs [xi, eta] of
1376	points or geospatial object
1377
1378	mesh_origin: A geo_reference object or 3-tuples consisting of
1379	UTM zone, easting and northing.
1380	If specified vertex coordinates are assumed to be
1381	relative to their respective origins.
1382	"""
1383
1384	# Input check
1385	if isinstance(points, Geospatial_data):
1386	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1387	assert geo_reference is None, msg
1388	return points
1389	else:
1390	# List or numeric array of absolute points
1391	points = ensure_numeric(points, numpy.float)
1392
1393	# Sort out geo reference
1394	if geo_reference is None:
1395	geo = None
1396	else:
1397	if isinstance(geo_reference, Geo_reference):
1398	geo = geo_reference
1399	else:
1400	geo = Geo_reference(geo_reference[0],
1401	geo_reference[1],
1402	geo_reference[2])
1403
1404	# Create Geospatial_data object with appropriate geo reference and return
1405	points = Geospatial_data(data_points=points, geo_reference=geo)
1406	return points
1407
1408
1409
1410	def find_optimal_smoothing_parameter(data_file,
1411	alpha_list=None,
1412	mesh_file=None,
1413	boundary_poly=None,
1414	mesh_resolution=100000,
1415	north_boundary=None,
1416	south_boundary=None,
1417	east_boundary=None,
1418	west_boundary=None,
1419	plot_name='all_alphas',
1420	split_factor=0.1,
1421	seed_num=None,
1422	cache=False,
1423	verbose=False
1424	):
1425
1426	"""
1427	Removes a small random sample of points from 'data_file'. Then creates
1428	models with different alpha values from 'alpha_list' and cross validates
1429	the predicted value to the previously removed point data. Returns the
1430	alpha value which has the smallest covariance.
1431
1432	data_file: must not contain points outside the boundaries defined
1433	and it either a pts, txt or csv file.
1434
1435	alpha_list: the alpha values to test in a single list
1436
1437	mesh_file: name of the created mesh file or if passed in will read it.
1438	NOTE, if there is a mesh file mesh_resolution,
1439	north_boundary, south... etc will be ignored.
1440
1441	mesh_resolution: the maximum area size for a triangle
1442
1443	north_boundary... west_boundary: the value of the boundary
1444
1445	plot_name: the name for the plot contain the results
1446
1447	seed_num: the seed to the random number generator
1448
1449	USAGE:
1450	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1451	alpha_list=[0.0001, 0.01, 1],
1452	mesh_file=None,
1453	mesh_resolution=3,
1454	north_boundary=5,
1455	south_boundary=-5,
1456	east_boundary=5,
1457	west_boundary=-5,
1458	plot_name='all_alphas',
1459	seed_num=100000,
1460	verbose=False)
1461
1462	OUTPUT: returns the minumum normalised covalance calculate AND the
1463	alpha that created it. PLUS writes a plot of the results
1464
1465	NOTE: code will not work if the data_file extent is greater than the
1466	boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1467
1468	"""
1469
1470	from anuga.shallow_water import Domain
1471	from anuga.geospatial_data.geospatial_data import Geospatial_data
1472	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1473
1474	from anuga.utilities.numerical_tools import cov
1475	from anuga.utilities.polygon import is_inside_polygon
1476	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1477
1478
1479	attribute_smoothed='elevation'
1480
1481	if mesh_file is None:
1482	if verbose: print "building mesh"
1483	mesh_file='temp.msh'
1484
1485	if north_boundary is None or south_boundary is None or \
1486	east_boundary is None or west_boundary is None:
1487	no_boundary=True
1488	else:
1489	no_boundary=False
1490
1491	if no_boundary is True:
1492	msg= 'All boundaries must be defined'
1493	raise msg
1494
1495	poly_topo = [[east_boundary,south_boundary],
1496	[east_boundary,north_boundary],
1497	[west_boundary,north_boundary],
1498	[west_boundary,south_boundary]]
1499
1500	create_mesh_from_regions(poly_topo,
1501	boundary_tags={'back': [2],
1502	'side': [1,3],
1503	'ocean': [0]},
1504	maximum_triangle_area=mesh_resolution,
1505	filename=mesh_file,
1506	use_cache=cache,
1507	verbose=verbose)
1508
1509	else: # if mesh file provided
1510	#test mesh file exists?
1511	if verbose: "reading from file: %s" %mesh_file
1512	if access(mesh_file,F_OK) == 0:
1513	msg="file %s doesn't exist!" %mesh_file
1514	raise IOError, msg
1515
1516	#split topo data
1517	if verbose: print 'Reading elevation file: %s' %data_file
1518	G = Geospatial_data(file_name = data_file)
1519	if verbose: print 'Start split'
1520	G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
1521	if verbose: print 'Finish split'
1522	points=G_small.get_data_points()
1523
1524	if verbose: print "Number of points in sample to compare: ", len(points)
1525
1526	if alpha_list==None:
1527	alphas = [0.001,0.01,100]
1528	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1529	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1530
1531	else:
1532	alphas=alpha_list
1533
1534	#creates array with columns 1 and 2 are x, y. column 3 is elevation
1535	#4 onwards is the elevation_predicted using the alpha, which will
1536	#be compared later against the real removed data
1537	data=numpy.array([],typecode=numpy.float)
1538
1539	data=numpy.resize(data,(len(points),3+len(alphas)))
1540
1541	#gets relative point from sample
1542	data[:,0]=points[:,0]
1543	data[:,1]=points[:,1]
1544	elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
1545	data[:,2]=elevation_sample
1546
1547	normal_cov=numpy.array(numpy.zeros([len(alphas),2]),typecode=numpy.float)
1548
1549	if verbose: print 'Setup computational domains with different alphas'
1550
1551	#print 'memory usage before domains',mem_usage()
1552
1553	for i,alpha in enumerate(alphas):
1554	#add G_other data to domains with different alphas
1555	if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
1556	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1557	if verbose:print domain.statistics()
1558	domain.set_quantity(attribute_smoothed,
1559	geospatial_data = G_other,
1560	use_cache = cache,
1561	verbose = verbose,
1562	alpha = alpha)
1563
1564
1565	# Convert points to geospatial data for use with get_values below
1566	points_geo = Geospatial_data(points, domain.geo_reference)
1567
1568	#returns the predicted elevation of the points that were "split" out
1569	#of the original data set for one particular alpha
1570	if verbose: print 'Get predicted elevation for location to be compared'
1571	elevation_predicted=domain.quantities[attribute_smoothed].\
1572	get_values(interpolation_points=points_geo)
1573
1574	#add predicted elevation to array that starts with x, y, z...
1575	data[:,i+3]=elevation_predicted
1576
1577	sample_cov= cov(elevation_sample)
1578	#print elevation_predicted
1579	ele_cov= cov(elevation_sample-elevation_predicted)
1580	normal_cov[i,:]= [alpha,ele_cov/sample_cov]
1581	#print 'memory usage during compare',mem_usage()
1582
1583	if verbose: print'Covariance for alpha ',normal_cov[i][0],'= ',normal_cov[i][1]
1584	if verbose: print'-------------------------------------------- \n'
1585
1586	normal_cov0=normal_cov[:,0]
1587	normal_cov_new=numpy.take(normal_cov,numpy.argsort(normal_cov0)) ##FIX?
1588
1589	if plot_name is not None:
1590	from pylab import savefig,semilogx,loglog
1591	semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
1592	loglog(normal_cov_new[:,0],normal_cov_new[:,1])
1593	savefig(plot_name,dpi=300)
1594	if mesh_file == 'temp.msh':
1595	remove(mesh_file)
1596
1597	if verbose:
1598	print 'Final results:'
1599	for i,alpha in enumerate(alphas):
1600	print'covariance for alpha %s = %s ' %(normal_cov[i][0],normal_cov[i][1])
1601	print '\n Optimal alpha is: %s ' % normal_cov_new[(numpy.argmin(normal_cov_new,axis=0))[1],0]
1602
1603	# covariance and optimal alpha
1604	return min(normal_cov_new[:,1]) , normal_cov_new[(numpy.argmin(normal_cov_new,axis=0))[1],0]
1605
1606	def old_find_optimal_smoothing_parameter(data_file,
1607	alpha_list=None,
1608	mesh_file=None,
1609	boundary_poly=None,
1610	mesh_resolution=100000,
1611	north_boundary=None,
1612	south_boundary=None,
1613	east_boundary=None,
1614	west_boundary=None,
1615	plot_name='all_alphas',
1616	split_factor=0.1,
1617	seed_num=None,
1618	cache=False,
1619	verbose=False
1620	):
1621
1622	"""
1623	data_file: must not contain points outside the boundaries defined
1624	and it either a pts, txt or csv file.
1625
1626	alpha_list: the alpha values to test in a single list
1627
1628	mesh_file: name of the created mesh file or if passed in will read it.
1629	NOTE, if there is a mesh file mesh_resolution,
1630	north_boundary, south... etc will be ignored.
1631
1632	mesh_resolution: the maximum area size for a triangle
1633
1634	north_boundary... west_boundary: the value of the boundary
1635
1636	plot_name: the name for the plot contain the results
1637
1638	seed_num: the seed to the random number generator
1639
1640	USAGE:
1641	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1642	alpha_list=[0.0001, 0.01, 1],
1643	mesh_file=None,
1644	mesh_resolution=3,
1645	north_boundary=5,
1646	south_boundary=-5,
1647	east_boundary=5,
1648	west_boundary=-5,
1649	plot_name='all_alphas',
1650	seed_num=100000,
1651	verbose=False)
1652
1653	OUTPUT: returns the minumum normalised covalance calculate AND the
1654	alpha that created it. PLUS writes a plot of the results
1655
1656	NOTE: code will not work if the data_file extend is greater than the
1657	boundary_polygon or the north_boundary...west_boundary
1658
1659	"""
1660
1661	from anuga.shallow_water import Domain
1662	from anuga.geospatial_data.geospatial_data import Geospatial_data
1663	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1664
1665	from anuga.utilities.numerical_tools import cov
1666	from anuga.utilities.polygon import is_inside_polygon
1667	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1668
1669
1670	attribute_smoothed='elevation'
1671
1672	if mesh_file is None:
1673	mesh_file='temp.msh'
1674
1675	if north_boundary is None or south_boundary is None or \
1676	east_boundary is None or west_boundary is None:
1677	no_boundary=True
1678	else:
1679	no_boundary=False
1680
1681	if no_boundary is True:
1682	msg= 'All boundaries must be defined'
1683	raise msg
1684
1685	poly_topo = [[east_boundary,south_boundary],
1686	[east_boundary,north_boundary],
1687	[west_boundary,north_boundary],
1688	[west_boundary,south_boundary]]
1689
1690	create_mesh_from_regions(poly_topo,
1691	boundary_tags={'back': [2],
1692	'side': [1,3],
1693	'ocean': [0]},
1694	maximum_triangle_area=mesh_resolution,
1695	filename=mesh_file,
1696	use_cache=cache,
1697	verbose=verbose)
1698
1699	else: # if mesh file provided
1700	#test mesh file exists?
1701	if access(mesh_file,F_OK) == 0:
1702	msg="file %s doesn't exist!" %mesh_file
1703	raise IOError, msg
1704
1705	#split topo data
1706	G = Geospatial_data(file_name = data_file)
1707	if verbose: print 'start split'
1708	G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
1709	if verbose: print 'finish split'
1710	points=G_small.get_data_points()
1711
1712	#FIXME: Remove points outside boundary polygon
1713	# print 'new point',len(points)
1714	#
1715	# new_points=[]
1716	# new_points=array([],typecode=Float)
1717	# new_points=resize(new_points,(len(points),2))
1718	# print "BOUNDARY", boundary_poly
1719	# for i,point in enumerate(points):
1720	# if is_inside_polygon(point,boundary_poly, verbose=True):
1721	# new_points[i] = point
1722	# print"WOW",i,new_points[i]
1723	# points = new_points
1724
1725
1726	if verbose: print "Number of points in sample to compare: ", len(points)
1727
1728	if alpha_list==None:
1729	alphas = [0.001,0.01,100]
1730	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1731	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1732
1733	else:
1734	alphas=alpha_list
1735	domains = {}
1736
1737	if verbose: print 'Setup computational domains with different alphas'
1738
1739	print 'memory usage before domains',mem_usage()
1740
1741	for alpha in alphas:
1742	#add G_other data to domains with different alphas
1743	if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
1744	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1745	if verbose:print domain.statistics()
1746	domain.set_quantity(attribute_smoothed,
1747	geospatial_data = G_other,
1748	use_cache = cache,
1749	verbose = verbose,
1750	alpha = alpha)
1751	domains[alpha]=domain
1752
1753	print 'memory usage after domains',mem_usage()
1754
1755	#creates array with columns 1 and 2 are x, y. column 3 is elevation
1756	#4 onwards is the elevation_predicted using the alpha, which will
1757	#be compared later against the real removed data
1758	data=numpy.array([],typecode=numpy.float)
1759
1760	data=numpy.resize(data,(len(points),3+len(alphas)))
1761
1762	#gets relative point from sample
1763	data[:,0]=points[:,0]
1764	data[:,1]=points[:,1]
1765	elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
1766	data[:,2]=elevation_sample
1767
1768	normal_cov=numpy.array(numpy.zeros([len(alphas),2]),typecode=numpy.float)
1769
1770	if verbose: print 'Determine difference between predicted results and actual data'
1771	for i,alpha in enumerate(domains):
1772	if verbose: print'Alpha =',alpha
1773
1774	points_geo=domains[alpha].geo_reference.change_points_geo_ref(points)
1775	#returns the predicted elevation of the points that were "split" out
1776	#of the original data set for one particular alpha
1777	elevation_predicted=domains[alpha].quantities[attribute_smoothed].\
1778	get_values(interpolation_points=points_geo)
1779
1780	#add predicted elevation to array that starts with x, y, z...
1781	data[:,i+3]=elevation_predicted
1782
1783	sample_cov= cov(elevation_sample)
1784	#print elevation_predicted
1785	ele_cov= cov(elevation_sample-elevation_predicted)
1786	normal_cov[i,:]= [alpha,ele_cov/sample_cov]
1787	print 'memory usage during compare',mem_usage()
1788
1789
1790	if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
1791
1792	normal_cov0=normal_cov[:,0]
1793	normal_cov_new=numpy.take(normal_cov,numpy.argsort(normal_cov0)) ##FIX?
1794
1795	if plot_name is not None:
1796	from pylab import savefig,semilogx,loglog
1797	semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
1798	loglog(normal_cov_new[:,0],normal_cov_new[:,1])
1799	savefig(plot_name,dpi=300)
1800	if mesh_file == 'temp.msh':
1801	remove(mesh_file)
1802
1803	return min(normal_cov_new[:,1]) , normal_cov_new[(numpy.argmin(normal_cov_new,axis=0))[1],0]
1804
1805
1806	if __name__ == "__main__":
1807	pass
1808

Note: See TracBrowser for help on using the repository browser.

Download in other formats: