Context Navigation

source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 5079

Last change on this file since 5079 was 5010, checked in by nick, 17 years ago
update 'clip' and clip_outside' with verbose and added some comment statments if verbose
File size: 65.7 KB

Line
1	"""Class Geospatial_data - Manipulation of locations on the planet and
2	associated attributes.
3
4	"""
5	from sys import maxint
6	from os import access, F_OK, R_OK,remove
7	from types import DictType
8	from warnings import warn
9	from string import lower
10	from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11	size, shape
12	#from Array import tolist
13	from RandomArray import randint, seed, get_seed
14	from copy import deepcopy
15
16	#from MA import tolist
17
18	from Scientific.IO.NetCDF import NetCDFFile
19	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
20	from anuga.utilities.numerical_tools import ensure_numeric
21	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
22	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
23	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
24	from anuga.utilities.anuga_exceptions import ANUGAError
25	from anuga.config import points_file_block_line_size as MAX_READ_LINES
26	#from anuga.fit_interpolate.benchmark_least_squares import mem_usage
27
28
29	DEFAULT_ATTRIBUTE = 'elevation'
30
31	class Geospatial_data:
32
33	def __init__(self,
34	data_points=None, # this can also be a points file name
35	attributes=None,
36	geo_reference=None,
37	default_attribute_name=None,
38	file_name=None,
39	latitudes=None,
40	longitudes=None,
41	points_are_lats_longs=False,
42	max_read_lines=None,
43	load_file_now=True,
44	verbose=False):
45
46
47	"""
48	Create instance from data points and associated attributes
49
50	data_points: x,y coordinates in meters. Type must be either a
51	sequence of 2-tuples or an Mx2 Numeric array of floats. A file name
52	with extension .txt, .cvs or .pts can also be passed in here.
53
54	attributes: Associated values for each data point. The type
55	must be either a list or an array of length M or a dictionary
56	of lists (or arrays) of length M. In the latter case the keys
57	in the dictionary represent the attribute names, in the former
58	the attribute will get the default name "elevation".
59
60	geo_reference: Object representing the origin of the data
61	points. It contains UTM zone, easting and northing and data
62	points are assumed to be relative to this origin.
63	If geo_reference is None, the default geo ref object is used.
64
65	default_attribute_name: Name of default attribute to be used with
66	get_attribute_values. The idea is that the dataset can be
67	equipped with information about which attribute to return.
68	If None, the default is the "first"
69
70	latitudes, longitudes: Vectors of latitudes and longitudes,
71	used to specify location instead of points.
72
73	points_are_lats_longs: Set this as true if the points are actually
74	lats and longs, not UTM
75
76	max_read_lines: The number of rows read into memory when using
77	blocking to read a file.
78
79	load_file_now: If true the file is automatically loaded
80	into the geospatial instance. Used when blocking.
81
82	file_name: Name of input netCDF file or .txt file. netCDF file must
83	have dimensions "points" etc.
84	.txt file is a comma seperated file with x, y and attribute
85	data.
86
87	The first line has the titles of the columns. The first two
88	column titles are checked to see if they start with lat or
89	long (not case sensitive). If so the data is assumed to be
90	latitude and longitude, in decimal format and converted to
91	UTM. Otherwise the first two columns are assumed to be the x
92	and y, and the title names acually used are ignored.
93
94
95	The format for a .txt file is:
96	1st line: [column names]
97	other lines: x y [attributes]
98
99	for example:
100	x, y, elevation, friction
101	0.6, 0.7, 4.9, 0.3
102	1.9, 2.8, 5, 0.3
103	2.7, 2.4, 5.2, 0.3
104
105	The first two columns have to be x, y or lat, long
106	coordinates.
107
108
109	The format for a Points dictionary is:
110
111	['pointlist'] a 2 column array describing points. 1st column x,
112	2nd column y.
113	['attributelist'], a dictionary of 1D arrays, representing
114	attribute values at the point. The dictionary key is the attribute
115	header.
116	['geo_reference'] a Geo_refernece object. Use if the point
117	information is relative. This is optional.
118	eg
119	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
120	dic['attributelist']['elevation'] = [[7.0,5.0]
121
122	verbose:
123
124
125	"""
126
127	if isinstance(data_points, basestring):
128	# assume data point is really a file name
129	file_name = data_points
130
131	self.set_verbose(verbose)
132	self.geo_reference=None #create the attribute
133	self.file_name = file_name
134
135	if max_read_lines is None:
136	self.max_read_lines = MAX_READ_LINES
137	else:
138	self.max_read_lines = max_read_lines
139
140	if file_name is None:
141	if latitudes is not None or longitudes is not None or \
142	points_are_lats_longs:
143	data_points, geo_reference = \
144	_set_using_lat_long(latitudes=latitudes,
145	longitudes=longitudes,
146	geo_reference=geo_reference,
147	data_points=data_points,
148	points_are_lats_longs=points_are_lats_longs)
149	self.check_data_points(data_points)
150	self.set_attributes(attributes)
151	self.set_geo_reference(geo_reference)
152	self.set_default_attribute_name(default_attribute_name)
153
154	elif load_file_now is True:
155	# watch for case where file name and points,
156	# attributes etc are provided!!
157	# if file name then all provided info will be removed!
158
159	if verbose is True:
160	if file_name is not None:
161	print 'Loading Geospatial data from file: %s' %file_name
162
163	self.import_points_file(file_name, verbose=verbose)
164
165	self.check_data_points(self.data_points)
166	self.set_attributes(self.attributes)
167	self.set_geo_reference(self.geo_reference)
168	self.set_default_attribute_name(default_attribute_name)
169
170	if verbose is True:
171	if file_name is not None:
172	print 'Geospatial data created from file: %s' %file_name
173	if load_file_now is False:
174	print 'Data will be loaded blockwise on demand'
175
176	if file_name.endswith('csv') or file_name.endswith('txt'):
177	print 'ASCII formats are not that great for '
178	print 'blockwise reading. Consider storing this'
179	print 'data as a pts NetCDF format'
180
181	def __len__(self):
182	return len(self.data_points)
183
184	def __repr__(self):
185	return str(self.get_data_points(absolute=True))
186
187
188	def check_data_points(self, data_points):
189	"""Checks data points
190	"""
191
192	if data_points is None:
193	self.data_points = None
194	msg = 'There is no data or file provided!'
195	raise ValueError, msg
196
197	else:
198	self.data_points = ensure_numeric(data_points)
199	#print "self.data_points.shape",self.data_points.shape
200	if not (0,) == self.data_points.shape:
201	assert len(self.data_points.shape) == 2
202	assert self.data_points.shape[1] == 2
203
204	def set_attributes(self, attributes):
205	"""Check and assign attributes dictionary
206	"""
207
208	if attributes is None:
209	self.attributes = None
210	return
211
212	if not isinstance(attributes, DictType):
213	#Convert single attribute into dictionary
214	attributes = {DEFAULT_ATTRIBUTE: attributes}
215
216	#Check input attributes
217	for key in attributes.keys():
218	try:
219	attributes[key] = ensure_numeric(attributes[key])
220	except:
221	msg = 'Attribute %s could not be converted' %key
222	msg += 'to a numeric vector'
223	raise msg
224
225	self.attributes = attributes
226
227
228	def set_geo_reference(self, geo_reference):
229	"""
230	Set's the georeference of geospatial.
231	It can also be used to change the georeference
232	"""
233	from anuga.coordinate_transforms.geo_reference import Geo_reference
234
235	if geo_reference is None:
236	geo_reference = Geo_reference() # Use default
237	geo_reference = ensure_geo_reference(geo_reference)
238	if not isinstance(geo_reference, Geo_reference):
239	msg = 'Argument geo_reference must be a valid Geo_reference \n'
240	msg += 'object or None.'
241	raise msg
242
243	# if a geo ref already exists, change the point data to
244	# represent the new geo-ref
245	if self.geo_reference is not None:
246	#FIXME: Maybe put out a warning here...
247	self.data_points = self.get_data_points \
248	(geo_reference=geo_reference)
249
250	self.geo_reference = geo_reference
251
252
253	def set_default_attribute_name(self, default_attribute_name):
254	self.default_attribute_name = default_attribute_name
255
256	def set_verbose(self, verbose=False):
257	if verbose in [False, True]:
258	self.verbose = verbose
259	else:
260	msg = 'Illegal value: %s' %str(verbose)
261	raise Exception, msg
262
263	def clip(self, polygon, closed=True, verbose=False):
264	"""Clip geospatial data by a polygon
265
266	Input
267	polygon - Either a list of points, an Nx2 array or
268	a Geospatial data object.
269	closed - (optional) determine whether points on boundary should be
270	regarded as belonging to the polygon (closed = True)
271	or not (closed = False). Default is True.
272
273	Output
274	New geospatial data object representing points inside
275	specified polygon.
276
277	"""
278
279	from anuga.utilities.polygon import inside_polygon
280
281	if isinstance(polygon, Geospatial_data):
282	# Polygon is an object - extract points
283	polygon = polygon.get_data_points()
284	#print 'polygon',polygon
285
286	points = self.get_data_points()
287	#print '%s points:%s' %(verbose,points)
288	inside_indices = inside_polygon(points, polygon, closed, verbose)
289
290	clipped_G = self.get_sample(inside_indices)
291	# clipped_points = take(points, inside_indices)
292
293	# Clip all attributes
294	# attributes = self.get_all_attributes()
295
296	# clipped_attributes = {}
297	# if attributes is not None:
298	# for key, att in attributes.items():
299	# clipped_attributes[key] = take(att, inside_indices)
300
301	# return Geospatial_data(clipped_points, clipped_attributes)
302	return clipped_G
303
304
305	def clip_outside(self, polygon, closed=True,verbose=False):
306	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
307
308	Input
309	polygon - Either a list of points, an Nx2 array or
310	a Geospatial data object.
311	closed - (optional) determine whether points on boundary should be
312	regarded as belonging to the polygon (closed = True)
313	or not (closed = False). Default is True.
314
315	Output
316	Geospatial data object representing point OUTSIDE specified polygon
317
318	"""
319
320	from anuga.utilities.polygon import outside_polygon
321
322	if isinstance(polygon, Geospatial_data):
323	# Polygon is an object - extract points
324	polygon = polygon.get_data_points()
325
326	points = self.get_data_points()
327	outside_indices = outside_polygon(points, polygon, closed,verbose)
328
329	clipped_G = self.get_sample(outside_indices)
330
331	# clipped_points = take(points, outside_indices)
332
333	# Clip all attributes
334	# attributes = self.get_all_attributes()
335
336	# clipped_attributes = {}
337	# if attributes is not None:
338	# for key, att in attributes.items():
339	# clipped_attributes[key] = take(att, outside_indices)
340
341	# return Geospatial_data(clipped_points, clipped_attributes)
342	return clipped_G
343
344
345	def get_geo_reference(self):
346	return self.geo_reference
347
348	def get_data_points(self, absolute=True, geo_reference=None,
349	as_lat_long=False,isSouthHemisphere=True):
350	"""Get coordinates for all data points as an Nx2 array
351
352	If absolute is False returned coordinates are relative to the
353	internal georeference's xll and yll corners, otherwise
354	absolute UTM coordinates are returned.
355
356	If a geo_reference is passed the points are returned relative
357	to that geo_reference.
358
359	isSH (isSouthHemisphere) is only used when getting data
360	points "as_lat_long" is True and if FALSE will return lats and
361	longs valid for the Northern Hemisphere.
362
363	Default: absolute is True.
364	"""
365	if as_lat_long is True:
366	msg = "Points need a zone to be converted into lats and longs"
367	assert self.geo_reference is not None, msg
368	zone = self.geo_reference.get_zone()
369	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
370	lats_longs = []
371	for point in self.get_data_points(True):
372	### UTMtoLL(northing, easting, zone,
373	lat_calced, long_calced = UTMtoLL(point[1],point[0],
374	zone, isSouthHemisphere)
375	lats_longs.append((lat_calced, long_calced)) # to hash
376	return lats_longs
377
378	if absolute is True and geo_reference is None:
379	return self.geo_reference.get_absolute(self.data_points)
380	elif geo_reference is not None:
381	return geo_reference.change_points_geo_ref \
382	(self.data_points,
383	self.geo_reference)
384	else:
385	return self.data_points
386
387
388	def get_attributes(self, attribute_name=None):
389	"""Return values for one named attribute.
390
391	If attribute_name is None, default_attribute_name is used
392	"""
393
394	if attribute_name is None:
395	if self.default_attribute_name is not None:
396	attribute_name = self.default_attribute_name
397	else:
398	attribute_name = self.attributes.keys()[0]
399	# above line takes the first one from keys
400
401	if self.verbose is True:
402	print 'Using attribute %s' %attribute_name
403	print 'Available attributes: %s' %(self.attributes.keys())
404
405	msg = 'Attribute name %s does not exist in data set' %attribute_name
406	assert self.attributes.has_key(attribute_name), msg
407
408	return self.attributes[attribute_name]
409
410	def get_all_attributes(self):
411	"""
412	Return values for all attributes.
413	The return value is either None or a dictionary (possibly empty).
414	"""
415
416	return self.attributes
417
418	def __add__(self, other):
419	"""
420	Returns the addition of 2 geospatical objects,
421	objects are concatencated to the end of each other
422
423	NOTE: doesn't add if objects contain different
424	attributes
425
426	Always return absolute points!
427	This alse means, that if you add None to the object,
428	it will be turned into absolute coordinates
429
430	other can be None in which case nothing is added to self.
431	"""
432
433
434	# find objects zone and checks if the same
435	geo_ref1 = self.get_geo_reference()
436	zone1 = geo_ref1.get_zone()
437
438
439	if other is not None:
440
441	geo_ref2 = other.get_geo_reference()
442	zone2 = geo_ref2.get_zone()
443
444	geo_ref1.reconcile_zones(geo_ref2)
445
446
447	new_points = concatenate((self.get_data_points(absolute=True),
448	other.get_data_points(absolute=True)),
449	axis = 0)
450
451
452
453	# Concatenate attributes if any
454	if self.attributes is None:
455	if other.attributes is not None:
456	msg = 'Geospatial data must have the same \n'
457	msg += 'attributes to allow addition.'
458	raise Exception, msg
459
460	new_attributes = None
461	else:
462	new_attributes = {}
463	for x in self.attributes.keys():
464	if other.attributes.has_key(x):
465
466	attrib1 = self.attributes[x]
467	attrib2 = other.attributes[x]
468	new_attributes[x] = concatenate((attrib1, attrib2))
469
470	else:
471	msg = 'Geospatial data must have the same \n'
472	msg += 'attributes to allow addition.'
473	raise Exception, msg
474
475
476	else:
477	#other is None:
478
479	new_points = self.get_data_points(absolute=True)
480	new_attributes = self.attributes
481
482
483
484	# Instantiate new data object and return absolute coordinates
485	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
486	return Geospatial_data(new_points,
487	new_attributes,
488	new_geo_ref)
489
490
491	def __radd__(self, other):
492	"""Handle cases like None + Geospatial_data(...)
493	"""
494
495	return self + other
496
497
498
499	###
500	# IMPORT/EXPORT POINTS FILES
501	###
502
503	def import_points_file(self, file_name, delimiter=None, verbose=False):
504	""" load an .txt, .csv or .pts file
505	Note: will throw an IOError/SyntaxError if it can't load the file.
506	Catch these!
507
508	Post condition: self.attributes dictionary has been set
509	"""
510
511	if access(file_name, F_OK) == 0 :
512	msg = 'File %s does not exist or is not accessible' %file_name
513	raise IOError, msg
514
515	attributes = {}
516	if file_name[-4:]== ".pts":
517	try:
518	data_points, attributes, geo_reference =\
519	_read_pts_file(file_name, verbose)
520	except IOError, e:
521	msg = 'Could not open file %s ' %file_name
522	raise IOError, msg
523
524	elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
525	try:
526	data_points, attributes, geo_reference =\
527	_read_csv_file(file_name, verbose)
528	except IOError, e:
529	# This should only be if a file is not found
530	msg = 'Could not open file %s. ' %file_name
531	msg += 'Check the file location.'
532	raise IOError, msg
533	except SyntaxError, e:
534	# This should only be if there is a format error
535	msg = 'Could not open file %s. \n' %file_name
536	msg += Error_message['IOError']
537	#print "msg", msg
538	raise SyntaxError, msg
539	else:
540	msg = 'Extension %s is unknown' %file_name[-4:]
541	raise IOError, msg
542	# print'in import data_points', data_points
543	# print'in import attributes', attributes
544	# print'in import data_points', geo_reference
545	self.data_points = data_points
546	self.attributes = attributes
547	self.geo_reference = geo_reference
548
549	# return all_data
550
551	def export_points_file(self, file_name, absolute=True,
552	as_lat_long=False, isSouthHemisphere=True):
553
554	"""
555	write a points file, file_name, as a text (.csv) or binary (.pts) file
556	file_name is the file name, including the extension
557	The point_dict is defined at the top of this file.
558
559	If absolute is True data the xll and yll are added to the points value
560	and the xll and yll of the geo_reference are set to 0.
561
562	If absolute is False data points at returned as relative to the xll
563	and yll and geo_reference remains uneffected
564
565	isSouthHemisphere: is only used when getting data
566	points "as_lat_long" is True and if FALSE will return lats and
567	longs valid for the Northern Hemisphere.
568
569	"""
570
571	if (file_name[-4:] == ".pts"):
572	if absolute is True:
573	geo_ref = deepcopy(self.geo_reference)
574	geo_ref.xllcorner = 0
575	geo_ref.yllcorner = 0
576	_write_pts_file(file_name,
577	self.get_data_points(absolute),
578	self.get_all_attributes(),
579	geo_ref)
580	else:
581	_write_pts_file(file_name,
582	self.get_data_points(absolute),
583	self.get_all_attributes(),
584	self.get_geo_reference())
585
586	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
587	msg = "ERROR: trying to write a .txt file with relative data."
588	assert absolute, msg
589	_write_csv_file(file_name,
590	self.get_data_points(absolute=True,
591	as_lat_long=as_lat_long,
592	isSouthHemisphere=isSouthHemisphere),
593	self.get_all_attributes(),
594	as_lat_long=as_lat_long)
595
596	elif file_name[-4:] == ".urs" :
597	msg = "ERROR: Can not write a .urs file as a relative file."
598	assert absolute, msg
599	_write_urs_file(file_name,
600	self.get_data_points(as_lat_long=True,
601	isSouthHemisphere=isSouthHemisphere))
602
603	else:
604	msg = 'Unknown file type %s ' %file_name
605	raise IOError, msg
606
607	def get_sample(self, indices):
608	""" Returns a object which is a subset of the original
609	and the data points and attributes in this new object refer to
610	the indices provided
611
612	Input
613	indices- a list of integers that represent the new object
614	Output
615	New geospatial data object representing points specified by
616	the indices
617	"""
618	#FIXME: add the geo_reference to this
619
620	points = self.get_data_points()
621	sampled_points = take(points, indices)
622
623	attributes = self.get_all_attributes()
624
625	sampled_attributes = {}
626	if attributes is not None:
627	for key, att in attributes.items():
628	sampled_attributes[key] = take(att, indices)
629
630	return Geospatial_data(sampled_points, sampled_attributes)
631
632
633	def split(self, factor=0.5,seed_num=None, verbose=False):
634	"""Returns two
635	geospatial_data object, first is the size of the 'factor'
636	smaller the original and the second is the remainder. The two
637	new object are disjoin set of each other.
638
639	Points of the two new object have selected RANDOMLY.
640
641	This method create two lists of indices which are passed into
642	get_sample. The lists are created using random numbers, and
643	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
644	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
645
646	Input - the factor which to split the object, if 0.1 then 10% of the
647	together object will be returned
648
649	Output - two geospatial_data objects that are disjoint sets of the
650	original
651	"""
652
653	i=0
654	self_size = len(self)
655	random_list = []
656	remainder_list = []
657	new_size = round(factor*self_size)
658
659	# Find unique random numbers
660	if verbose: print "make unique random number list and get indices"
661
662	total=array(range(self_size))
663	total_list = total.tolist()
664	if verbose: print "total list len",len(total_list)
665
666	# There will be repeated random numbers however will not be a
667	# problem as they are being 'pop'ed out of array so if there
668	# are two numbers the same they will pop different indicies,
669	# still basically random
670	## create list of non-unquie random numbers
671	if verbose: print "create random numbers list %s long" %new_size
672
673	# Set seed if provided, mainly important for unit test!
674	# plus recalcule seed when no seed provided.
675	if seed_num != None:
676	seed(seed_num,seed_num)
677	else:
678	seed()
679	if verbose: print "seed:", get_seed()
680
681	#print 'size',self_size, new_size
682	random_num = randint(0,self_size-1,(int(new_size),))
683	#print 'random num',random_num
684	random_num = random_num.tolist()
685
686	#need to sort and reverse so the pop() works correctly
687	random_num.sort()
688	random_num.reverse()
689
690	if verbose: print "make random number list and get indices"
691	j=0
692	k=1
693	remainder_list = total_list[:]
694	# pops array index (random_num) from remainder_list
695	# (which starts as the
696	# total_list and appends to random_list
697	random_num_len = len(random_num)
698	for i in random_num:
699	random_list.append(remainder_list.pop(i))
700	j+=1
701	#prints progress
702	if verbose and round(random_num_len/10*k)==j:
703	print '(%s/%s)' %(j, random_num_len)
704	k+=1
705
706	# FIXME: move to tests, it might take a long time
707	# then create an array of random lenght between 500 and 1000,
708	# and use a random factor between 0 and 1
709	# setup for assertion
710	test_total = random_list[:]
711	test_total.extend(remainder_list)
712	test_total.sort()
713	msg = 'The two random lists made from the original list when added '\
714	'together DO NOT equal the original list'
715	assert (total_list==test_total),msg
716
717	# Get new samples
718	if verbose: print "get values of indices for random list"
719	G1 = self.get_sample(random_list)
720	if verbose: print "get values of indices for opposite of random list"
721	G2 = self.get_sample(remainder_list)
722
723	return G1, G2
724
725	def __iter__(self):
726	"""Read in the header, number_of_points and save the
727	file pointer position
728	"""
729
730	from Scientific.IO.NetCDF import NetCDFFile
731
732	#FIXME - what to do if the file isn't there
733
734	# FIXME (Ole): Shouldn't this go into the constructor?
735	# This method acts like the constructor when blcoking.
736	# ... and shouldn't it be called block_size?
737	#
738	if self.max_read_lines is None:
739	self.max_read_lines = MAX_READ_LINES
740
741	if self.file_name[-4:] == ".pts":
742
743	# See if the file is there. Throw a QUIET IO error if it isn't
744	fd = open(self.file_name,'r')
745	fd.close()
746
747	# Throws prints to screen if file not present
748	self.fid = NetCDFFile(self.file_name, 'r')
749
750	self.blocking_georef, self.blocking_keys, self.number_of_points =\
751	_read_pts_file_header(self.fid,
752	self.verbose)
753	self.start_row = 0
754	self.last_row = self.number_of_points
755	self.show_verbose = 0
756	self.verbose_block_size = (self.last_row + 10)/10
757	self.block_number = 0
758	self.number_of_blocks = self.number_of_points/self.max_read_lines
759	# This computes the number of full blocks. The last block may be
760	# smaller and won't be ircluded in this estimate.
761
762	if self.verbose is True:
763	print 'Reading %d points (in ~%d blocks) from file %s. '\
764	%(self.number_of_points,
765	self.number_of_blocks,
766	self.file_name),
767	print 'Each block consists of %d data points'\
768	%self.max_read_lines
769
770	else:
771	# Assume the file is a csv file
772	file_pointer = open(self.file_name)
773	self.header, self.file_pointer = \
774	_read_csv_file_header(file_pointer)
775	self.blocking_georef = None # Used for reconciling zones
776
777	return self
778
779
780	def next(self):
781	""" read a block, instanciate a new geospatial and return it"""
782
783	if self.file_name[-4:] == ".pts":
784	if self.start_row == self.last_row:
785	# Read the end of the file last iteration
786	# Remove blocking attributes
787	self.fid.close()
788	del self.max_read_lines
789	del self.blocking_georef
790	del self.last_row
791	del self.start_row
792	del self.blocking_keys
793	del self.fid
794	raise StopIteration
795	fin_row = self.start_row + self.max_read_lines
796	if fin_row > self.last_row:
797	fin_row = self.last_row
798
799
800
801	if self.verbose is True:
802	if self.show_verbose >= self.start_row and \
803	self.show_verbose < fin_row:
804
805
806	#print 'Doing %d of %d' %(self.start_row, self.last_row+10)
807
808	print 'Reading block %d (points %d to %d) out of %d'\
809	%(self.block_number,
810	self.start_row,
811	fin_row,
812	self.number_of_blocks)
813
814
815	self.show_verbose += max(self.max_read_lines,
816	self.verbose_block_size)
817
818
819	# Read next block
820	pointlist, att_dict, = \
821	_read_pts_file_blocking(self.fid,
822	self.start_row,
823	fin_row,
824	self.blocking_keys)
825
826	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
827	self.start_row = fin_row
828
829	self.block_number += 1
830
831	else:
832	# Assume the file is a csv file
833	try:
834	pointlist, att_dict, geo_ref, self.file_pointer = \
835	_read_csv_file_blocking( self.file_pointer,
836	self.header[:],
837	max_read_lines=self.max_read_lines,
838	verbose=self.verbose)
839
840	# Check that the zones haven't changed.
841	if geo_ref is not None:
842	geo_ref.reconcile_zones(self.blocking_georef)
843	self.blocking_georef = geo_ref
844	elif self.blocking_georef is not None:
845
846	msg = 'Geo reference given, then not given.'
847	msg += ' This should not happen.'
848	raise ValueError, msg
849	geo = Geospatial_data(pointlist, att_dict, geo_ref)
850	except StopIteration:
851	self.file_pointer.close()
852	del self.header
853	del self.file_pointer
854	raise StopIteration
855	except ANUGAError:
856	self.file_pointer.close()
857	del self.header
858	del self.file_pointer
859	raise
860	except SyntaxError:
861	self.file_pointer.close()
862	del self.header
863	del self.file_pointer
864	# This should only be if there is a format error
865	msg = 'Could not open file %s. \n' %self.file_name
866	msg += Error_message['IOError']
867	raise SyntaxError, msg
868	return geo
869
870
871	##################### Error messages ###########
872	Error_message = {}
873	Em = Error_message
874	Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
875	Em['IOError'] += " 1st line: [column names]\n"
876	Em['IOError'] += " other lines: [x value], [y value], [attributes]\n"
877	Em['IOError'] += "\n"
878	Em['IOError'] += " for example:\n"
879	Em['IOError'] += " x, y, elevation, friction\n"
880	Em['IOError'] += " 0.6, 0.7, 4.9, 0.3\n"
881	Em['IOError'] += " 1.9, 2.8, 5, 0.3\n"
882	Em['IOError'] += " 2.7, 2.4, 5.2, 0.3\n"
883	Em['IOError'] += "\n"
884	Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
885	Em['IOError'] += "The attribute values must be numeric.\n"
886
887	def _set_using_lat_long(latitudes,
888	longitudes,
889	geo_reference,
890	data_points,
891	points_are_lats_longs):
892	"""
893	if the points has lat long info, assume it is in (lat, long) order.
894	"""
895
896	if geo_reference is not None:
897	msg = """A georeference is specified yet latitude and longitude
898	are also specified!"""
899	raise ValueError, msg
900
901	if data_points is not None and not points_are_lats_longs:
902	msg = """Data points are specified yet latitude and
903	longitude are also specified!"""
904	raise ValueError, msg
905
906	if points_are_lats_longs:
907	if data_points is None:
908	msg = """Data points are not specified !"""
909	raise ValueError, msg
910	lats_longs = ensure_numeric(data_points)
911	latitudes = ravel(lats_longs[:,0:1])
912	longitudes = ravel(lats_longs[:,1:])
913
914	if latitudes is None and longitudes is None:
915	msg = """Latitudes and Longitudes are not."""
916	raise ValueError, msg
917
918	if latitudes is None:
919	msg = """Longitudes are specified yet latitudes aren't!"""
920	raise ValueError, msg
921
922	if longitudes is None:
923	msg = """Latitudes are specified yet longitudes aren't!"""
924	raise ValueError, msg
925
926	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
927	longitudes=longitudes)
928	return data_points, Geo_reference(zone=zone)
929
930
931	def _read_pts_file(file_name, verbose=False):
932	"""Read .pts NetCDF file
933
934	Return a dic of array of points, and dic of array of attribute
935	eg
936	dic['points'] = [[1.0,2.0],[3.0,5.0]]
937	dic['attributelist']['elevation'] = [[7.0,5.0]
938	"""
939
940	from Scientific.IO.NetCDF import NetCDFFile
941
942	if verbose: print 'Reading ', file_name
943
944
945	# See if the file is there. Throw a QUIET IO error if it isn't
946	fd = open(file_name,'r')
947	fd.close()
948
949	# Throws prints to screen if file not present
950	fid = NetCDFFile(file_name, 'r')
951
952	pointlist = array(fid.variables['points'])
953	keys = fid.variables.keys()
954	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
955	try:
956	keys.remove('points')
957	except IOError, e:
958	fid.close()
959	msg = 'Expected keyword "points" but could not find it'
960	raise IOError, msg
961
962	attributes = {}
963	for key in keys:
964	if verbose: print "reading attribute '%s'" %key
965
966	attributes[key] = array(fid.variables[key])
967
968
969	try:
970	geo_reference = Geo_reference(NetCDFObject=fid)
971	except AttributeError, e:
972	geo_reference = None
973
974	fid.close()
975
976	return pointlist, attributes, geo_reference
977
978
979	def _read_csv_file(file_name, verbose=False):
980	"""Read .csv file
981
982	Return a dic of array of points, and dic of array of attribute
983	eg
984	dic['points'] = [[1.0,2.0],[3.0,5.0]]
985	dic['attributelist']['elevation'] = [[7.0,5.0]
986	"""
987
988	file_pointer = open(file_name)
989	header, file_pointer = _read_csv_file_header(file_pointer)
990	try:
991	pointlist, att_dict, geo_ref, file_pointer = \
992	_read_csv_file_blocking( \
993	file_pointer,
994	header,
995	max_read_lines=1e30) #If the file is bigger that this, block..
996	except ANUGAError:
997	file_pointer.close()
998	raise
999	file_pointer.close()
1000	return pointlist, att_dict, geo_ref
1001
1002	CSV_DELIMITER = ','
1003	def _read_csv_file_header(file_pointer,
1004	delimiter=CSV_DELIMITER,
1005	verbose=False):
1006
1007	"""Read the header of a .csv file
1008	Return a list of the header names
1009	"""
1010	line = file_pointer.readline()
1011	header = clean_line(line, delimiter)
1012	return header, file_pointer
1013
1014	def _read_csv_file_blocking(file_pointer, header,
1015	delimiter=CSV_DELIMITER,
1016	max_read_lines=MAX_READ_LINES,
1017	verbose=False):
1018
1019
1020	"""
1021	Read the body of a .csv file.
1022	header: The list header of the csv file, with the x and y labels.
1023	"""
1024	points = []
1025	pointattributes = []
1026	att_dict = {}
1027
1028	# This is to remove the x and y headers.
1029	header = header[:]
1030	try:
1031	x_header = header.pop(0)
1032	y_header = header.pop(0)
1033	except IndexError:
1034	# if there are not two columns this will occur.
1035	# eg if it is a space seperated file
1036	raise SyntaxError
1037
1038	read_lines = 0
1039	while read_lines<max_read_lines:
1040	line = file_pointer.readline()
1041	#print "line",line
1042	numbers = clean_line(line,delimiter)
1043	if len(numbers) <= 1:
1044	break
1045	if line[0] == '#':
1046	continue
1047	read_lines += 1
1048	try:
1049	x = float(numbers[0])
1050	y = float(numbers[1])
1051	points.append([x,y])
1052	numbers.pop(0)
1053	numbers.pop(0)
1054	if len(header) != len(numbers):
1055	file_pointer.close()
1056	msg = "File load error. \
1057	There might be a problem with the file header"
1058	raise SyntaxError, msg
1059	for i,num in enumerate(numbers):
1060	num.strip()
1061	if num != '\n' and num != '':
1062	#attributes.append(float(num))
1063	att_dict.setdefault(header[i],[]).append(float(num))
1064	#except IOError:
1065	except ValueError:
1066	raise SyntaxError
1067	if points == []:
1068	raise StopIteration
1069
1070
1071	pointlist = array(points).astype(Float)
1072	for key in att_dict.keys():
1073	att_dict[key] = array(att_dict[key]).astype(Float)
1074
1075	# Do stuff here so the info is in lat's and longs
1076	geo_ref = None
1077	x_header = lower(x_header[:3])
1078	y_header = lower(y_header[:3])
1079	if (x_header == 'lon' or x_header == 'lat') and \
1080	(y_header == 'lon' or y_header == 'lat'):
1081	if x_header == 'lon':
1082	longitudes = ravel(pointlist[:,0:1])
1083	latitudes = ravel(pointlist[:,1:])
1084	else:
1085	latitudes = ravel(pointlist[:,0:1])
1086	longitudes = ravel(pointlist[:,1:])
1087
1088	pointlist, geo_ref = _set_using_lat_long(latitudes,
1089	longitudes,
1090	geo_reference=None,
1091	data_points=None,
1092	points_are_lats_longs=False)
1093	return pointlist, att_dict, geo_ref, file_pointer
1094
1095	def _read_pts_file_header(fid, verbose=False):
1096
1097	"""Read the geo_reference and number_of_points from a .pts file
1098	"""
1099
1100	keys = fid.variables.keys()
1101	try:
1102	keys.remove('points')
1103	except IOError, e:
1104	fid.close()
1105	msg = 'Expected keyword "points" but could not find it'
1106	raise IOError, msg
1107	if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1108
1109	try:
1110	geo_reference = Geo_reference(NetCDFObject=fid)
1111	except AttributeError, e:
1112	geo_reference = None
1113
1114	return geo_reference, keys, fid.dimensions['number_of_points']
1115
1116	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1117	#verbose=False):
1118
1119
1120	"""
1121	Read the body of a .csv file.
1122	header: The list header of the csv file, with the x and y labels.
1123	"""
1124
1125	pointlist = array(fid.variables['points'][start_row:fin_row])
1126
1127	attributes = {}
1128	for key in keys:
1129	attributes[key] = array(fid.variables[key][start_row:fin_row])
1130
1131	return pointlist, attributes
1132
1133	def _write_pts_file(file_name,
1134	write_data_points,
1135	write_attributes=None,
1136	write_geo_reference=None):
1137	"""
1138	Write .pts NetCDF file
1139
1140	NOTE: Below might not be valid ask Duncan : NB 5/2006
1141
1142	WARNING: This function mangles the point_atts data structure
1143	#F??ME: (DSG)This format has issues.
1144	# There can't be an attribute called points
1145	# consider format change
1146	# method changed by NB not sure if above statement is correct
1147
1148	should create new test for this
1149	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1150	for key in point_atts.keys():
1151	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1152	assert key in legal_keys, msg
1153	"""
1154	from Scientific.IO.NetCDF import NetCDFFile
1155	# NetCDF file definition
1156	outfile = NetCDFFile(file_name, 'w')
1157
1158	# Create new file
1159	outfile.institution = 'Geoscience Australia'
1160	outfile.description = 'NetCDF format for compact and portable storage ' +\
1161	'of spatial point data'
1162
1163	# Dimension definitions
1164	shape = write_data_points.shape[0]
1165	outfile.createDimension('number_of_points', shape)
1166	outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1167
1168	# Variable definition
1169	outfile.createVariable('points', Float, ('number_of_points',
1170	'number_of_dimensions'))
1171
1172	#create variables
1173	outfile.variables['points'][:] = write_data_points #.astype(Float32)
1174
1175	if write_attributes is not None:
1176	for key in write_attributes.keys():
1177	outfile.createVariable(key, Float, ('number_of_points',))
1178	outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1179
1180	if write_geo_reference is not None:
1181	write_NetCDF_georeference(write_geo_reference, outfile)
1182
1183	outfile.close()
1184
1185	def _write_csv_file(file_name,
1186	write_data_points,
1187	write_attributes=None,
1188	as_lat_long=False,
1189	delimiter=','):
1190	"""
1191	export a file, file_name, with the csv format
1192
1193	"""
1194	points = write_data_points
1195	pointattributes = write_attributes
1196
1197	fd = open(file_name,'w')
1198	if as_lat_long:
1199	titlelist = "latitude" + delimiter + "longitude" + delimiter
1200	else:
1201	titlelist = "x" + delimiter + "y" + delimiter
1202	if pointattributes is not None:
1203	for title in pointattributes.keys():
1204	titlelist = titlelist + title + delimiter
1205	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1206	fd.write(titlelist+"\n")
1207
1208	# <x/lat> <y/long> [attributes]
1209	for i, vert in enumerate( points):
1210
1211
1212	if pointattributes is not None:
1213	attlist = ","
1214	for att in pointattributes.keys():
1215	attlist = attlist + str(pointattributes[att][i])+ delimiter
1216	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1217	attlist.strip()
1218	else:
1219	attlist = ''
1220
1221	fd.write(str(vert[0]) + delimiter +
1222	str(vert[1]) + attlist + "\n")
1223
1224	fd.close()
1225
1226	def _write_urs_file(file_name,
1227	points,
1228	delimiter=' '):
1229	"""
1230	export a file, file_name, with the urs format
1231	the data points are in lats and longs
1232
1233	"""
1234	fd = open(file_name,'w')
1235	fd.write(str(len(points))+"\n")
1236	# <lat> <long> <id#>
1237	for i, vert in enumerate( points):
1238	fd.write(str(round(vert[0],7)) + delimiter + \
1239	str(round(vert[1],7)) + delimiter +str(i)+ "\n")
1240	fd.close()
1241
1242	def _point_atts2array(point_atts):
1243	point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1244
1245	for key in point_atts['attributelist'].keys():
1246	point_atts['attributelist'][key]=\
1247	array(point_atts['attributelist'][key]).astype(Float)
1248	return point_atts
1249
1250
1251
1252
1253	def geospatial_data2points_dictionary(geospatial_data):
1254	"""Convert geospatial data to points_dictionary
1255	"""
1256
1257	points_dictionary = {}
1258	points_dictionary['pointlist'] = geospatial_data.data_points
1259
1260	points_dictionary['attributelist'] = {}
1261
1262	for attribute_name in geospatial_data.attributes.keys():
1263	val = geospatial_data.attributes[attribute_name]
1264	points_dictionary['attributelist'][attribute_name] = val
1265
1266	points_dictionary['geo_reference'] = geospatial_data.geo_reference
1267
1268	return points_dictionary
1269
1270
1271	def points_dictionary2geospatial_data(points_dictionary):
1272	"""Convert points_dictionary to geospatial data object
1273	"""
1274
1275	msg = 'Points dictionary must have key pointlist'
1276	assert points_dictionary.has_key('pointlist'), msg
1277
1278	msg = 'Points dictionary must have key attributelist'
1279	assert points_dictionary.has_key('attributelist'), msg
1280
1281	if points_dictionary.has_key('geo_reference'):
1282	geo = points_dictionary['geo_reference']
1283	else:
1284	geo = None
1285
1286	return Geospatial_data(points_dictionary['pointlist'],
1287	points_dictionary['attributelist'],
1288	geo_reference = geo)
1289
1290	def clean_line(line,delimiter):
1291	"""Remove whitespace
1292	"""
1293	#print ">%s" %line
1294	line = line.strip()
1295	#print "stripped>%s" %line
1296	numbers = line.split(delimiter)
1297	i = len(numbers) - 1
1298	while i >= 0:
1299	if numbers[i] == '':
1300	numbers.pop(i)
1301	else:
1302	numbers[i] = numbers[i].strip()
1303
1304	i += -1
1305	#for num in numbers:
1306	# print "num>%s<" %num
1307	return numbers
1308
1309	def ensure_absolute(points, geo_reference=None):
1310	"""
1311	This function inputs several formats and
1312	outputs one format. - a numeric array of absolute points.
1313
1314	Inputed formats are;
1315	points: List or numeric array of coordinate pairs [xi, eta] of
1316	points or geospatial object or points file name
1317
1318	mesh_origin: A geo_reference object or 3-tuples consisting of
1319	UTM zone, easting and northing.
1320	If specified vertex coordinates are assumed to be
1321	relative to their respective origins.
1322	"""
1323	if isinstance(points,type('')):
1324	#It's a string
1325	#assume it is a point file
1326	points = Geospatial_data(file_name = points)
1327
1328	if isinstance(points,Geospatial_data):
1329	points = points.get_data_points( \
1330	absolute = True)
1331	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1332	assert geo_reference == None, msg
1333
1334	else:
1335	points = ensure_numeric(points, Float)
1336	if geo_reference is None:
1337	geo = None #Geo_reference()
1338	else:
1339	if isinstance(geo_reference, Geo_reference):
1340	geo = geo_reference
1341	else:
1342	geo = Geo_reference(geo_reference[0],
1343	geo_reference[1],
1344	geo_reference[2])
1345	points = geo.get_absolute(points)
1346	return points
1347
1348
1349	def ensure_geospatial(points, geo_reference=None):
1350	"""
1351	This function inputs several formats and
1352	outputs one format. - a geospatial_data instance.
1353
1354	Inputed formats are;
1355	points: List or numeric array of coordinate pairs [xi, eta] of
1356	points or geospatial object
1357
1358	mesh_origin: A geo_reference object or 3-tuples consisting of
1359	UTM zone, easting and northing.
1360	If specified vertex coordinates are assumed to be
1361	relative to their respective origins.
1362	"""
1363	if isinstance(points,Geospatial_data):
1364	msg = "Use a Geospatial_data object or a mesh origin. Not both."
1365	assert geo_reference == None, msg
1366	return points
1367	else:
1368	points = ensure_numeric(points, Float)
1369	if geo_reference is None:
1370	geo = None
1371	else:
1372	if isinstance(geo_reference, Geo_reference):
1373	geo = geo_reference
1374	else:
1375	geo = Geo_reference(geo_reference[0],
1376	geo_reference[1],
1377	geo_reference[2])
1378	points = Geospatial_data(data_points=points, geo_reference=geo)
1379	return points
1380
1381
1382
1383	def find_optimal_smoothing_parameter(data_file,
1384	alpha_list=None,
1385	mesh_file=None,
1386	boundary_poly=None,
1387	mesh_resolution=100000,
1388	north_boundary=None,
1389	south_boundary=None,
1390	east_boundary=None,
1391	west_boundary=None,
1392	plot_name='all_alphas',
1393	split_factor=0.1,
1394	seed_num=None,
1395	cache=False,
1396	verbose=False
1397	):
1398
1399	"""
1400	data_file: must not contain points outside the boundaries defined
1401	and it either a pts, txt or csv file.
1402
1403	alpha_list: the alpha values to test in a single list
1404
1405	mesh_file: name of the created mesh file or if passed in will read it.
1406	NOTE, if there is a mesh file mesh_resolution,
1407	north_boundary, south... etc will be ignored.
1408
1409	mesh_resolution: the maximum area size for a triangle
1410
1411	north_boundary... west_boundary: the value of the boundary
1412
1413	plot_name: the name for the plot contain the results
1414
1415	seed_num: the seed to the random number generator
1416
1417	USAGE:
1418	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1419	alpha_list=[0.0001, 0.01, 1],
1420	mesh_file=None,
1421	mesh_resolution=3,
1422	north_boundary=5,
1423	south_boundary=-5,
1424	east_boundary=5,
1425	west_boundary=-5,
1426	plot_name='all_alphas',
1427	seed_num=100000,
1428	verbose=False)
1429
1430	OUTPUT: returns the minumum normalised covalance calculate AND the
1431	alpha that created it. PLUS writes a plot of the results
1432
1433	NOTE: code will not work if the data_file extend is greater than the
1434	boundary_polygon or the north_boundary...west_boundary
1435
1436	"""
1437
1438	from anuga.shallow_water import Domain
1439	from anuga.geospatial_data.geospatial_data import Geospatial_data
1440	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1441
1442	from anuga.utilities.numerical_tools import cov
1443	from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
1444	from anuga.utilities.polygon import is_inside_polygon
1445	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1446
1447
1448	attribute_smoothed='elevation'
1449
1450	if mesh_file is None:
1451	if verbose: print "building mesh"
1452	mesh_file='temp.msh'
1453
1454	if north_boundary is None or south_boundary is None or \
1455	east_boundary is None or west_boundary is None:
1456	no_boundary=True
1457	else:
1458	no_boundary=False
1459
1460	if no_boundary is True:
1461	msg= 'All boundaries must be defined'
1462	raise msg
1463
1464	poly_topo = [[east_boundary,south_boundary],
1465	[east_boundary,north_boundary],
1466	[west_boundary,north_boundary],
1467	[west_boundary,south_boundary]]
1468
1469	create_mesh_from_regions(poly_topo,
1470	boundary_tags={'back': [2],
1471	'side': [1,3],
1472	'ocean': [0]},
1473	maximum_triangle_area=mesh_resolution,
1474	filename=mesh_file,
1475	use_cache=cache,
1476	verbose=verbose)
1477
1478	else: # if mesh file provided
1479	#test mesh file exists?
1480	if verbose: "reading from file: %s" %mesh_file
1481	if access(mesh_file,F_OK) == 0:
1482	msg="file %s doesn't exist!" %mesh_file
1483	raise IOError, msg
1484
1485	#split topo data
1486	if verbose: print 'Reading elevation file: %s' %data_file
1487	G = Geospatial_data(file_name = data_file)
1488	if verbose: print 'Start split'
1489	G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
1490	if verbose: print 'Finish split'
1491	points=G_small.get_data_points()
1492
1493
1494
1495
1496	#FIXME: Remove points outside boundary polygon
1497	# print 'new point',len(points)
1498	#
1499	# new_points=[]
1500	# new_points=array([],typecode=Float)
1501	# new_points=resize(new_points,(len(points),2))
1502	# print "BOUNDARY", boundary_poly
1503	# for i,point in enumerate(points):
1504	# if is_inside_polygon(point,boundary_poly, verbose=True):
1505	# new_points[i] = point
1506	# print"WOW",i,new_points[i]
1507	# points = new_points
1508
1509
1510	if verbose: print "Number of points in sample to compare: ", len(points)
1511
1512	if alpha_list==None:
1513	alphas = [0.001,0.01,100]
1514	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1515	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1516
1517	else:
1518	alphas=alpha_list
1519	# domains = {}
1520
1521	#creates array with columns 1 and 2 are x, y. column 3 is elevation
1522	#4 onwards is the elevation_predicted using the alpha, which will
1523	#be compared later against the real removed data
1524	data=array([],typecode=Float)
1525
1526	data=resize(data,(len(points),3+len(alphas)))
1527
1528	#gets relative point from sample
1529	data[:,0]=points[:,0]
1530	data[:,1]=points[:,1]
1531	elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
1532	data[:,2]=elevation_sample
1533
1534	normal_cov=array(zeros([len(alphas),2]),typecode=Float)
1535
1536
1537
1538
1539
1540	if verbose: print 'Setup computational domains with different alphas'
1541
1542	#print 'memory usage before domains',mem_usage()
1543
1544	for i,alpha in enumerate(alphas):
1545	#add G_other data to domains with different alphas
1546	if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
1547	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1548	if verbose:print domain.statistics()
1549	domain.set_quantity(attribute_smoothed,
1550	geospatial_data = G_other,
1551	use_cache = cache,
1552	verbose = verbose,
1553	alpha = alpha)
1554	# domains[alpha]=domain
1555
1556	points_geo=domain.geo_reference.change_points_geo_ref(points)
1557	#returns the predicted elevation of the points that were "split" out
1558	#of the original data set for one particular alpha
1559	if verbose: print 'Get predicted elevation for location to be compared'
1560	elevation_predicted=domain.quantities[attribute_smoothed].\
1561	get_values(interpolation_points=points_geo)
1562
1563	#add predicted elevation to array that starts with x, y, z...
1564	data[:,i+3]=elevation_predicted
1565
1566	sample_cov= cov(elevation_sample)
1567	#print elevation_predicted
1568	ele_cov= cov(elevation_sample-elevation_predicted)
1569	normal_cov[i,:]= [alpha,ele_cov/sample_cov]
1570	#print 'memory usage during compare',mem_usage()
1571
1572	if verbose: print'Covariance for alpha ',normal_cov[i][0],'= ',normal_cov[i][1]
1573	if verbose: print'-------------------------------------------- \n'
1574	# if verbose: print 'Determine difference between predicted results and actual data'
1575	# for i,alpha in enumerate(domains):
1576	# if verbose: print'Alpha =',alpha
1577	#
1578	# points_geo=domains[alpha].geo_reference.change_points_geo_ref(points)
1579	# #returns the predicted elevation of the points that were "split" out
1580	# #of the original data set for one particular alpha
1581	# elevation_predicted=domains[alpha].quantities[attribute_smoothed].\
1582	# get_values(interpolation_points=points_geo)
1583	#
1584	# #add predicted elevation to array that starts with x, y, z...
1585	# data[:,i+3]=elevation_predicted
1586	#
1587	# sample_cov= cov(elevation_sample)
1588	# #print elevation_predicted
1589	# ele_cov= cov(elevation_sample-elevation_predicted)
1590	# normal_cov[i,:]= [alpha,ele_cov/sample_cov]
1591	# print 'memory usage during compare',mem_usage()
1592	#
1593	#
1594	# if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
1595
1596	normal_cov0=normal_cov[:,0]
1597	normal_cov_new=take(normal_cov,argsort(normal_cov0))
1598
1599	if plot_name is not None:
1600	from pylab import savefig,semilogx,loglog
1601	semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
1602	loglog(normal_cov_new[:,0],normal_cov_new[:,1])
1603	savefig(plot_name,dpi=300)
1604	if mesh_file == 'temp.msh':
1605	remove(mesh_file)
1606
1607	if verbose:
1608	print 'Final results:'
1609	for i,alpha in enumerate(alphas):
1610	print'covariance for alpha %s = %s ' %(normal_cov[i][0],normal_cov[i][1])
1611	print '\n Optimal alpha is: %s ' % normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
1612
1613	return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
1614
1615	def old_find_optimal_smoothing_parameter(data_file,
1616	alpha_list=None,
1617	mesh_file=None,
1618	boundary_poly=None,
1619	mesh_resolution=100000,
1620	north_boundary=None,
1621	south_boundary=None,
1622	east_boundary=None,
1623	west_boundary=None,
1624	plot_name='all_alphas',
1625	split_factor=0.1,
1626	seed_num=None,
1627	cache=False,
1628	verbose=False
1629	):
1630
1631	"""
1632	data_file: must not contain points outside the boundaries defined
1633	and it either a pts, txt or csv file.
1634
1635	alpha_list: the alpha values to test in a single list
1636
1637	mesh_file: name of the created mesh file or if passed in will read it.
1638	NOTE, if there is a mesh file mesh_resolution,
1639	north_boundary, south... etc will be ignored.
1640
1641	mesh_resolution: the maximum area size for a triangle
1642
1643	north_boundary... west_boundary: the value of the boundary
1644
1645	plot_name: the name for the plot contain the results
1646
1647	seed_num: the seed to the random number generator
1648
1649	USAGE:
1650	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1651	alpha_list=[0.0001, 0.01, 1],
1652	mesh_file=None,
1653	mesh_resolution=3,
1654	north_boundary=5,
1655	south_boundary=-5,
1656	east_boundary=5,
1657	west_boundary=-5,
1658	plot_name='all_alphas',
1659	seed_num=100000,
1660	verbose=False)
1661
1662	OUTPUT: returns the minumum normalised covalance calculate AND the
1663	alpha that created it. PLUS writes a plot of the results
1664
1665	NOTE: code will not work if the data_file extend is greater than the
1666	boundary_polygon or the north_boundary...west_boundary
1667
1668	"""
1669
1670	from anuga.shallow_water import Domain
1671	from anuga.geospatial_data.geospatial_data import Geospatial_data
1672	from anuga.pmesh.mesh_interface import create_mesh_from_regions
1673
1674	from anuga.utilities.numerical_tools import cov
1675	from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
1676	from anuga.utilities.polygon import is_inside_polygon
1677	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1678
1679
1680	attribute_smoothed='elevation'
1681
1682	if mesh_file is None:
1683	mesh_file='temp.msh'
1684
1685	if north_boundary is None or south_boundary is None or \
1686	east_boundary is None or west_boundary is None:
1687	no_boundary=True
1688	else:
1689	no_boundary=False
1690
1691	if no_boundary is True:
1692	msg= 'All boundaries must be defined'
1693	raise msg
1694
1695	poly_topo = [[east_boundary,south_boundary],
1696	[east_boundary,north_boundary],
1697	[west_boundary,north_boundary],
1698	[west_boundary,south_boundary]]
1699
1700	create_mesh_from_regions(poly_topo,
1701	boundary_tags={'back': [2],
1702	'side': [1,3],
1703	'ocean': [0]},
1704	maximum_triangle_area=mesh_resolution,
1705	filename=mesh_file,
1706	use_cache=cache,
1707	verbose=verbose)
1708
1709	else: # if mesh file provided
1710	#test mesh file exists?
1711	if access(mesh_file,F_OK) == 0:
1712	msg="file %s doesn't exist!" %mesh_file
1713	raise IOError, msg
1714
1715	#split topo data
1716	G = Geospatial_data(file_name = data_file)
1717	if verbose: print 'start split'
1718	G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
1719	if verbose: print 'finish split'
1720	points=G_small.get_data_points()
1721
1722	#FIXME: Remove points outside boundary polygon
1723	# print 'new point',len(points)
1724	#
1725	# new_points=[]
1726	# new_points=array([],typecode=Float)
1727	# new_points=resize(new_points,(len(points),2))
1728	# print "BOUNDARY", boundary_poly
1729	# for i,point in enumerate(points):
1730	# if is_inside_polygon(point,boundary_poly, verbose=True):
1731	# new_points[i] = point
1732	# print"WOW",i,new_points[i]
1733	# points = new_points
1734
1735
1736	if verbose: print "Number of points in sample to compare: ", len(points)
1737
1738	if alpha_list==None:
1739	alphas = [0.001,0.01,100]
1740	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1741	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1742
1743	else:
1744	alphas=alpha_list
1745	domains = {}
1746
1747	if verbose: print 'Setup computational domains with different alphas'
1748
1749	print 'memory usage before domains',mem_usage()
1750
1751	for alpha in alphas:
1752	#add G_other data to domains with different alphas
1753	if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
1754	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1755	if verbose:print domain.statistics()
1756	domain.set_quantity(attribute_smoothed,
1757	geospatial_data = G_other,
1758	use_cache = cache,
1759	verbose = verbose,
1760	alpha = alpha)
1761	domains[alpha]=domain
1762
1763	print 'memory usage after domains',mem_usage()
1764
1765	#creates array with columns 1 and 2 are x, y. column 3 is elevation
1766	#4 onwards is the elevation_predicted using the alpha, which will
1767	#be compared later against the real removed data
1768	data=array([],typecode=Float)
1769
1770	data=resize(data,(len(points),3+len(alphas)))
1771
1772	#gets relative point from sample
1773	data[:,0]=points[:,0]
1774	data[:,1]=points[:,1]
1775	elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
1776	data[:,2]=elevation_sample
1777
1778	normal_cov=array(zeros([len(alphas),2]),typecode=Float)
1779
1780	if verbose: print 'Determine difference between predicted results and actual data'
1781	for i,alpha in enumerate(domains):
1782	if verbose: print'Alpha =',alpha
1783
1784	points_geo=domains[alpha].geo_reference.change_points_geo_ref(points)
1785	#returns the predicted elevation of the points that were "split" out
1786	#of the original data set for one particular alpha
1787	elevation_predicted=domains[alpha].quantities[attribute_smoothed].\
1788	get_values(interpolation_points=points_geo)
1789
1790	#add predicted elevation to array that starts with x, y, z...
1791	data[:,i+3]=elevation_predicted
1792
1793	sample_cov= cov(elevation_sample)
1794	#print elevation_predicted
1795	ele_cov= cov(elevation_sample-elevation_predicted)
1796	normal_cov[i,:]= [alpha,ele_cov/sample_cov]
1797	print 'memory usage during compare',mem_usage()
1798
1799
1800	if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
1801
1802	normal_cov0=normal_cov[:,0]
1803	normal_cov_new=take(normal_cov,argsort(normal_cov0))
1804
1805	if plot_name is not None:
1806	from pylab import savefig,semilogx,loglog
1807	semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
1808	loglog(normal_cov_new[:,0],normal_cov_new[:,1])
1809	savefig(plot_name,dpi=300)
1810	if mesh_file == 'temp.msh':
1811	remove(mesh_file)
1812
1813	return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
1814
1815
1816	if __name__ == "__main__":
1817	pass
1818

Note: See TracBrowser for help on using the repository browser.

Download in other formats: