Context Navigation

geospatial_data.py @ 7207

Last change on this file since 7207 was 6902, checked in by rwilson, 16 years ago
Back-merge from Numeric trunk to numpy branch.
File size: 69.4 KB

Rev	Line
[6428]	1	"""Class Geospatial_data
[4126]	2
[6428]	3	Manipulation of locations on the planet and associated attributes.
[4126]	4	"""
[6081]	5
[4178]	6	from sys import maxint
[4744]	7	from os import access, F_OK, R_OK,remove
[4126]	8	from types import DictType
[4150]	9	from warnings import warn
[4180]	10	from string import lower
[4452]	11	from copy import deepcopy
[6689]	12	import copy
[6428]	13
[6081]	14	from Scientific.IO.NetCDF import NetCDFFile
[6304]	15	import numpy as num
[6768]	16	from numpy.random import randint, seed
[6153]	17
[6081]	18	from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
[4126]	19	from anuga.utilities.numerical_tools import ensure_numeric
[4223]	20	from anuga.coordinate_transforms.geo_reference import Geo_reference, \
[4452]	21	TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
[4126]	22	from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
[6360]	23	from anuga.utilities.system_tools import clean_line
[4180]	24	from anuga.utilities.anuga_exceptions import ANUGAError
[6081]	25	from anuga.config import points_file_block_line_size as MAX_READ_LINES
[6086]	26	from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
[6304]	27	from anuga.config import netcdf_float
[4126]	28
[6428]	29
[4535]	30	DEFAULT_ATTRIBUTE = 'elevation'
	31
[6081]	32
	33	##
	34	# @brief ??
[4126]	35	class Geospatial_data:
	36
[6081]	37	##
	38	# @brief
	39	# @param data_points Mx2 iterable of tuples or array of x,y coordinates.
	40	# @param attributes Associated values for each data point.
	41	# @param geo_reference ??
	42	# @param default_attribute_name ??
	43	# @param file_name
	44	# @param latitudes ??
	45	# @param longitudes ??
	46	# @param points_are_lats_longs True if points are lat/long, not UTM.
	47	# @param max_read_lines Size of block to read, if blocking.
	48	# @param load_file_now True if blocking but we eant to read file now.
	49	# @param verbose True if this class instance is verbose.
[4126]	50	def __init__(self,
	51	data_points=None, # this can also be a points file name
	52	attributes=None,
	53	geo_reference=None,
	54	default_attribute_name=None,
	55	file_name=None,
	56	latitudes=None,
	57	longitudes=None,
	58	points_are_lats_longs=False,
[4129]	59	max_read_lines=None,
	60	load_file_now=True,
[4126]	61	verbose=False):
[6304]	62	"""Create instance from data points and associated attributes
[4126]	63
	64	data_points: x,y coordinates in meters. Type must be either a
[6304]	65	sequence of 2-tuples or an Mx2 numeric array of floats. A file name
[4569]	66	with extension .txt, .cvs or .pts can also be passed in here.
[4126]	67
	68	attributes: Associated values for each data point. The type
	69	must be either a list or an array of length M or a dictionary
	70	of lists (or arrays) of length M. In the latter case the keys
	71	in the dictionary represent the attribute names, in the former
[4535]	72	the attribute will get the default name "elevation".
[6081]	73
[4126]	74	geo_reference: Object representing the origin of the data
	75	points. It contains UTM zone, easting and northing and data
	76	points are assumed to be relative to this origin.
[4179]	77	If geo_reference is None, the default geo ref object is used.
[4126]	78
	79	default_attribute_name: Name of default attribute to be used with
	80	get_attribute_values. The idea is that the dataset can be
	81	equipped with information about which attribute to return.
	82	If None, the default is the "first"
[4179]	83
	84	latitudes, longitudes: Vectors of latitudes and longitudes,
	85	used to specify location instead of points.
[6081]	86
[4179]	87	points_are_lats_longs: Set this as true if the points are actually
	88	lats and longs, not UTM
	89
	90	max_read_lines: The number of rows read into memory when using
	91	blocking to read a file.
	92
	93	load_file_now: If true the file is automatically loaded
	94	into the geospatial instance. Used when blocking.
[6081]	95
	96	file_name: Name of input netCDF file or .txt file. netCDF file must
[4126]	97	have dimensions "points" etc.
[4179]	98	.txt file is a comma seperated file with x, y and attribute
[4180]	99	data.
[6081]	100
[4180]	101	The first line has the titles of the columns. The first two
	102	column titles are checked to see if they start with lat or
	103	long (not case sensitive). If so the data is assumed to be
	104	latitude and longitude, in decimal format and converted to
	105	UTM. Otherwise the first two columns are assumed to be the x
	106	and y, and the title names acually used are ignored.
[6081]	107
	108
[4179]	109	The format for a .txt file is:
	110	1st line: [column names]
[4126]	111	other lines: x y [attributes]
	112
	113	for example:
[4179]	114	x, y, elevation, friction
[4126]	115	0.6, 0.7, 4.9, 0.3
	116	1.9, 2.8, 5, 0.3
	117	2.7, 2.4, 5.2, 0.3
	118
[4659]	119	The first two columns have to be x, y or lat, long
[4126]	120	coordinates.
[6081]	121
	122
[4126]	123	The format for a Points dictionary is:
	124	['pointlist'] a 2 column array describing points. 1st column x,
	125	2nd column y.
	126	['attributelist'], a dictionary of 1D arrays, representing
	127	attribute values at the point. The dictionary key is the attribute
	128	header.
	129	['geo_reference'] a Geo_refernece object. Use if the point
	130	information is relative. This is optional.
	131	eg
	132	dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
	133	dic['attributelist']['elevation'] = [[7.0,5.0]
[6081]	134
[4126]	135	verbose:
	136	"""
	137
	138	if isinstance(data_points, basestring):
[6304]	139	# assume data_points is really a file name
[4126]	140	file_name = data_points
	141
	142	self.set_verbose(verbose)
[6428]	143	self.geo_reference = None
[4126]	144	self.file_name = file_name
[6081]	145
[4663]	146	if max_read_lines is None:
	147	self.max_read_lines = MAX_READ_LINES
	148	else:
	149	self.max_read_lines = max_read_lines
[4179]	150
[4126]	151	if file_name is None:
[6428]	152	if (latitudes is not None or longitudes is not None
	153	or points_are_lats_longs):
[6081]	154	data_points, geo_reference = \
	155	_set_using_lat_long(latitudes=latitudes,
	156	longitudes=longitudes,
	157	geo_reference=geo_reference,
	158	data_points=data_points,
[6428]	159	points_are_lats_longs=
	160	points_are_lats_longs)
[4126]	161	self.check_data_points(data_points)
	162	self.set_attributes(attributes)
	163	self.set_geo_reference(geo_reference)
	164	self.set_default_attribute_name(default_attribute_name)
[4129]	165	elif load_file_now is True:
[4126]	166	# watch for case where file name and points,
	167	# attributes etc are provided!!
	168	# if file name then all provided info will be removed!
[4576]	169
[4633]	170	if verbose is True:
	171	if file_name is not None:
[6081]	172	print 'Loading Geospatial data from file: %s' % file_name
	173
[4660]	174	self.import_points_file(file_name, verbose=verbose)
[6081]	175
[4126]	176	self.check_data_points(self.data_points)
[6081]	177	self.set_attributes(self.attributes)
[4126]	178	self.set_geo_reference(self.geo_reference)
	179	self.set_default_attribute_name(default_attribute_name)
	180
[4576]	181	if verbose is True:
	182	if file_name is not None:
[6081]	183	print 'Geospatial data created from file: %s' % file_name
[4576]	184	if load_file_now is False:
	185	print 'Data will be loaded blockwise on demand'
	186
	187	if file_name.endswith('csv') or file_name.endswith('txt'):
[5351]	188	pass
	189	# This message was misleading.
	190	# FIXME (Ole): Are we blocking here or not?
[6304]	191	# print 'ASCII formats are not that great for '
	192	# print 'blockwise reading. Consider storing this'
	193	# print 'data as a pts NetCDF format'
[4576]	194
[6081]	195	##
	196	# @brief Return length of the points set.
[4126]	197	def __len__(self):
	198	return len(self.data_points)
	199
[6081]	200	##
	201	# @brief Return a string representation of the points set.
[4126]	202	def __repr__(self):
	203	return str(self.get_data_points(absolute=True))
[6081]	204
	205	##
[6304]	206	# @brief Check data points.
	207	# @param data_points Points data to check and store in instance.
[6081]	208	# @note Throws ValueError exception if no data.
[4126]	209	def check_data_points(self, data_points):
[6304]	210	"""Checks data points"""
[6081]	211
[4126]	212	if data_points is None:
	213	self.data_points = None
	214	msg = 'There is no data or file provided!'
	215	raise ValueError, msg
	216	else:
	217	self.data_points = ensure_numeric(data_points)
[4165]	218	if not (0,) == self.data_points.shape:
	219	assert len(self.data_points.shape) == 2
	220	assert self.data_points.shape[1] == 2
[4126]	221
[6081]	222	##
	223	# @brief Check and assign attributes data.
	224	# @param attributes Dictionary or scalar to save as .attributes.
	225	# @note Throws exception if unable to convert dict keys to numeric.
[4126]	226	def set_attributes(self, attributes):
[6304]	227	"""Check and assign attributes dictionary"""
[6081]	228
[4126]	229	if attributes is None:
	230	self.attributes = None
	231	return
[6081]	232
[4126]	233	if not isinstance(attributes, DictType):
[6304]	234	# Convert single attribute into dictionary
[4535]	235	attributes = {DEFAULT_ATTRIBUTE: attributes}
[4126]	236
[6304]	237	# Check input attributes
[4126]	238	for key in attributes.keys():
	239	try:
	240	attributes[key] = ensure_numeric(attributes[key])
	241	except:
[6304]	242	msg = ("Attribute '%s' (%s) could not be converted to a"
	243	"numeric vector" % (str(key), str(attributes[key])))
	244	raise Exception, msg
[4126]	245
[6081]	246	self.attributes = attributes
[4126]	247
[6081]	248	##
	249	# @brief Set the georeference of geospatial data.
[6304]	250	# @param geo_reference The georeference data to set.
[6081]	251	# @note Will raise exception if param not instance of Geo_reference.
[4126]	252	def set_geo_reference(self, geo_reference):
[5730]	253	"""Set the georeference of geospatial data.
[6081]	254
[5730]	255	It can also be used to change the georeference and will ensure that
	256	the absolute coordinate values are unchanged.
[4452]	257	"""
[6081]	258
[4126]	259	if geo_reference is None:
[5730]	260	# Use default - points are in absolute coordinates
	261	geo_reference = Geo_reference()
[6081]	262
	263	# Allow for tuple (zone, xllcorner, yllcorner)
[4452]	264	geo_reference = ensure_geo_reference(geo_reference)
[6081]	265
[4126]	266	if not isinstance(geo_reference, Geo_reference):
[6081]	267	# FIXME (Ole): This exception will be raised even
[5739]	268	# if geo_reference is None. Is that the intent Duncan?
[6428]	269	msg = ('Argument geo_reference must be a valid Geo_reference '
	270	'object or None.')
[6304]	271	raise Expection, msg
[4126]	272
[5730]	273	# If a geo_reference already exists, change the point data according to
	274	# the new geo reference
[4126]	275	if self.geo_reference is not None:
[5730]	276	self.data_points = self.get_data_points(geo_reference=geo_reference)
[6081]	277
[4126]	278	self.geo_reference = geo_reference
	279
[6081]	280	##
	281	# @brief Set default attribute name.
	282	# @param default_attribute_name The default to save.
[4126]	283	def set_default_attribute_name(self, default_attribute_name):
	284	self.default_attribute_name = default_attribute_name
	285
[6081]	286	##
	287	# @brief Set the instance verbose flag.
	288	# @param verbose The value to save.
	289	# @note Will raise exception if param is not True or False.
[4126]	290	def set_verbose(self, verbose=False):
	291	if verbose in [False, True]:
	292	self.verbose = verbose
	293	else:
[6081]	294	msg = 'Illegal value: %s' % str(verbose)
[4255]	295	raise Exception, msg
[4126]	296
[6081]	297	##
	298	# @brief Clip geospatial data by a given polygon.
	299	# @param polygon The polygon to clip with.
	300	# @param closed True if points on clip boundary are not included in result.
	301	# @param verbose True if this function is verbose.
[5010]	302	def clip(self, polygon, closed=True, verbose=False):
[4126]	303	"""Clip geospatial data by a polygon
	304
	305	Input
	306	polygon - Either a list of points, an Nx2 array or
	307	a Geospatial data object.
	308	closed - (optional) determine whether points on boundary should be
	309	regarded as belonging to the polygon (closed = True)
	310	or not (closed = False). Default is True.
[6081]	311
[4126]	312	Output
	313	New geospatial data object representing points inside
	314	specified polygon.
[6081]	315
	316
	317	Note - this method is non-destructive and leaves the data in 'self'
[5421]	318	unchanged
[4126]	319	"""
	320
	321	from anuga.utilities.polygon import inside_polygon
	322
	323	if isinstance(polygon, Geospatial_data):
	324	# Polygon is an object - extract points
	325	polygon = polygon.get_data_points()
	326
[6081]	327	points = self.get_data_points()
[5010]	328	inside_indices = inside_polygon(points, polygon, closed, verbose)
[4126]	329
	330	clipped_G = self.get_sample(inside_indices)
[5146]	331
[6081]	332	return clipped_G
[4126]	333
[6081]	334	##
	335	# @brief Clip points data by polygon, return points outside polygon.
	336	# @param polygon The polygon to clip with.
	337	# @param closed True if points on clip boundary are not included in result.
	338	# @param verbose True if this function is verbose.
	339	def clip_outside(self, polygon, closed=True, verbose=False):
[4126]	340	"""Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
	341
	342	Input
	343	polygon - Either a list of points, an Nx2 array or
	344	a Geospatial data object.
	345	closed - (optional) determine whether points on boundary should be
	346	regarded as belonging to the polygon (closed = True)
	347	or not (closed = False). Default is True.
[6081]	348
[4126]	349	Output
	350	Geospatial data object representing point OUTSIDE specified polygon
	351	"""
	352
	353	from anuga.utilities.polygon import outside_polygon
	354
	355	if isinstance(polygon, Geospatial_data):
	356	# Polygon is an object - extract points
	357	polygon = polygon.get_data_points()
	358
[6081]	359	points = self.get_data_points()
[5010]	360	outside_indices = outside_polygon(points, polygon, closed,verbose)
[4126]	361
	362	clipped_G = self.get_sample(outside_indices)
	363
	364	return clipped_G
	365
[6081]	366	##
	367	# @brief Get instance geo_reference data.
[4126]	368	def get_geo_reference(self):
	369	return self.geo_reference
[6081]	370
	371	##
	372	# @brief Get coordinates for all data points as an Nx2 array.
	373	# @param absolute If True, return UTM, else relative to xll/yll corners.
	374	# @param geo_reference If supplied, points are relative to it.
	375	# @param as_lat_long If True, return points as lat/lon.
	376	# @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
	377	# @return A set of data points, in appropriate form.
[6428]	378	def get_data_points(self,
	379	absolute=True,
	380	geo_reference=None,
	381	as_lat_long=False,
	382	isSouthHemisphere=True):
[4202]	383	"""Get coordinates for all data points as an Nx2 array
[4126]	384
[4194]	385	If absolute is False returned coordinates are relative to the
	386	internal georeference's xll and yll corners, otherwise
	387	absolute UTM coordinates are returned.
[4126]	388
	389	If a geo_reference is passed the points are returned relative
	390	to that geo_reference.
[6081]	391
	392	isSH (isSouthHemisphere) is only used when getting data
	393	points "as_lat_long" is True and if FALSE will return lats and
[4640]	394	longs valid for the Northern Hemisphere.
[4126]	395
	396	Default: absolute is True.
	397	"""
[6081]	398
[4223]	399	if as_lat_long is True:
	400	msg = "Points need a zone to be converted into lats and longs"
	401	assert self.geo_reference is not None, msg
	402	zone = self.geo_reference.get_zone()
	403	assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
	404	lats_longs = []
	405	for point in self.get_data_points(True):
[5730]	406	# UTMtoLL(northing, easting, zone,
[6081]	407	lat_calced, long_calced = UTMtoLL(point[1], point[0],
[4641]	408	zone, isSouthHemisphere)
[4225]	409	lats_longs.append((lat_calced, long_calced)) # to hash
[4223]	410	return lats_longs
[6081]	411
[4126]	412	if absolute is True and geo_reference is None:
	413	return self.geo_reference.get_absolute(self.data_points)
	414	elif geo_reference is not None:
[6081]	415	return geo_reference.change_points_geo_ref(self.data_points,
[5739]	416	self.geo_reference)
[4126]	417	else:
[6081]	418	# If absolute is False
[4126]	419	return self.data_points
[5146]	420
[6081]	421	##
	422	# @brief Get value for attribute name.
	423	# @param attribute_name Name to get value for.
	424	# @note If name passed is None, return default attribute value.
[4126]	425	def get_attributes(self, attribute_name=None):
	426	"""Return values for one named attribute.
	427
	428	If attribute_name is None, default_attribute_name is used
	429	"""
	430
	431	if attribute_name is None:
	432	if self.default_attribute_name is not None:
	433	attribute_name = self.default_attribute_name
	434	else:
[6081]	435	attribute_name = self.attributes.keys()[0]
[4126]	436	# above line takes the first one from keys
[6081]	437
[4255]	438	if self.verbose is True:
	439	print 'Using attribute %s' %attribute_name
[6081]	440	print 'Available attributes: %s' %(self.attributes.keys())
[4126]	441
[6081]	442	msg = 'Attribute name %s does not exist in data set' % attribute_name
[4126]	443	assert self.attributes.has_key(attribute_name), msg
	444
	445	return self.attributes[attribute_name]
	446
[6081]	447	##
	448	# @brief Get all instance attributes.
	449	# @return The instance attribute dictionary, or None if no attributes.
[4126]	450	def get_all_attributes(self):
[6081]	451	"""Return values for all attributes.
[4126]	452	The return value is either None or a dictionary (possibly empty).
	453	"""
	454
	455	return self.attributes
	456
[6081]	457	##
	458	# @brief Override __add__() to allow addition of geospatial objects.
	459	# @param self This object.
	460	# @param other The second object.
	461	# @return The new geospatial object.
[4126]	462	def __add__(self, other):
[6428]	463	"""Returns the addition of 2 geospatial objects,
[4126]	464	objects are concatencated to the end of each other
[6081]	465
	466	NOTE: doesn't add if objects contain different
	467	attributes
	468
[4484]	469	Always return absolute points!
[6081]	470	This also means, that if you add None to the object,
[4549]	471	it will be turned into absolute coordinates
	472
	473	other can be None in which case nothing is added to self.
[4126]	474	"""
	475
	476	# find objects zone and checks if the same
	477	geo_ref1 = self.get_geo_reference()
	478	zone1 = geo_ref1.get_zone()
	479
[4549]	480	if other is not None:
	481	geo_ref2 = other.get_geo_reference()
	482	zone2 = geo_ref2.get_zone()
	483	geo_ref1.reconcile_zones(geo_ref2)
[6153]	484	new_points = num.concatenate((self.get_data_points(absolute=True),
	485	other.get_data_points(absolute=True)),
	486	axis = 0)
[4126]	487
[4549]	488	# Concatenate attributes if any
	489	if self.attributes is None:
	490	if other.attributes is not None:
[6428]	491	msg = ('Geospatial data must have the same '
	492	'attributes to allow addition.')
[4549]	493	raise Exception, msg
[6081]	494
[4549]	495	new_attributes = None
[6081]	496	else:
[4549]	497	new_attributes = {}
	498	for x in self.attributes.keys():
	499	if other.attributes.has_key(x):
	500	attrib1 = self.attributes[x]
	501	attrib2 = other.attributes[x]
[6428]	502	new_attributes[x] = num.concatenate((attrib1, attrib2),
	503	axis=0) #??default#
[4549]	504	else:
[6428]	505	msg = ('Geospatial data must have the same '
	506	'attributes to allow addition.')
[4549]	507	raise Exception, msg
	508	else:
[6304]	509	# other is None:
[4549]	510	new_points = self.get_data_points(absolute=True)
	511	new_attributes = self.attributes
[4126]	512
[4484]	513	# Instantiate new data object and return absolute coordinates
	514	new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
[6081]	515	return Geospatial_data(new_points, new_attributes, new_geo_ref)
[4549]	516
[6081]	517	##
	518	# @brief Override the addition case where LHS isn't geospatial object.
	519	# @param self This object.
	520	# @param other The second object.
	521	# @return The new geospatial object.
[4549]	522	def __radd__(self, other):
[6304]	523	"""Handle cases like None + Geospatial_data(...)"""
[4549]	524
	525	return self + other
	526
[6428]	527	################################################################################
	528	# IMPORT/EXPORT POINTS FILES
	529	################################################################################
[4126]	530
[6081]	531	##
	532	# @brief Import a .txt, .csv or .pts points data file.
	533	# @param file_name
	534	# @param delimiter
	535	# @param verbose True if this function is to be verbose.
	536	# @note Will throw IOError or SyntaxError if there is a problem.
[4126]	537	def import_points_file(self, file_name, delimiter=None, verbose=False):
[4659]	538	""" load an .txt, .csv or .pts file
[6304]	539
[4659]	540	Note: will throw an IOError/SyntaxError if it can't load the file.
[4126]	541	Catch these!
	542
	543	Post condition: self.attributes dictionary has been set
	544	"""
[6081]	545
[4126]	546	if access(file_name, F_OK) == 0 :
[6081]	547	msg = 'File %s does not exist or is not accessible' % file_name
[4126]	548	raise IOError, msg
[6081]	549
[4126]	550	attributes = {}
[6410]	551	if file_name[-4:] == ".pts":
[4126]	552	try:
[6081]	553	data_points, attributes, geo_reference = \
[4126]	554	_read_pts_file(file_name, verbose)
[6081]	555	except IOError, e:
	556	msg = 'Could not open file %s ' % file_name
	557	raise IOError, msg
[6410]	558	elif file_name[-4:] == ".txt" or file_name[-4:]== ".csv":
[4126]	559	try:
[6081]	560	data_points, attributes, geo_reference = \
[4126]	561	_read_csv_file(file_name, verbose)
[4470]	562	except IOError, e:
	563	# This should only be if a file is not found
[6428]	564	msg = ('Could not open file %s. Check the file location.'
	565	% file_name)
[4470]	566	raise IOError, msg
	567	except SyntaxError, e:
	568	# This should only be if there is a format error
[6428]	569	msg = ('Problem with format of file %s.\n%s'
	570	% (file_name, Error_message['IOError']))
[4470]	571	raise SyntaxError, msg
[6081]	572	else:
[6428]	573	msg = 'Extension %s is unknown' % file_name[-4:]
[4126]	574	raise IOError, msg
[6081]	575
[4126]	576	self.data_points = data_points
	577	self.attributes = attributes
	578	self.geo_reference = geo_reference
[6081]	579
	580	##
	581	# @brief Write points data to a file (.csv or .pts).
	582	# @param file_name Path to file to write.
	583	# @param absolute ??
	584	# @param as_lat_long ??
	585	# @param isSouthHemisphere ??
	586	def export_points_file(self, file_name, absolute=True,
[4641]	587	as_lat_long=False, isSouthHemisphere=True):
[6304]	588	"""write a points file as a text (.csv) or binary (.pts) file
	589
[4126]	590	file_name is the file name, including the extension
	591	The point_dict is defined at the top of this file.
[6081]	592
	593	If absolute is True data the xll and yll are added to the points value
[4452]	594	and the xll and yll of the geo_reference are set to 0.
[6081]	595
	596	If absolute is False data points at returned as relative to the xll
[4126]	597	and yll and geo_reference remains uneffected
[6081]	598
	599	isSouthHemisphere: is only used when getting data
	600	points "as_lat_long" is True and if FALSE will return lats and
[4641]	601	longs valid for the Northern Hemisphere.
[4126]	602	"""
[4150]	603
[4659]	604	if (file_name[-4:] == ".pts"):
[4126]	605	if absolute is True:
[4452]	606	geo_ref = deepcopy(self.geo_reference)
	607	geo_ref.xllcorner = 0
	608	geo_ref.yllcorner = 0
[4126]	609	_write_pts_file(file_name,
[6081]	610	self.get_data_points(absolute),
[4452]	611	self.get_all_attributes(),
	612	geo_ref)
[4126]	613	else:
	614	_write_pts_file(file_name,
[6081]	615	self.get_data_points(absolute),
[4126]	616	self.get_all_attributes(),
	617	self.get_geo_reference())
[4165]	618	elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
[4154]	619	msg = "ERROR: trying to write a .txt file with relative data."
	620	assert absolute, msg
	621	_write_csv_file(file_name,
[4225]	622	self.get_data_points(absolute=True,
[4641]	623	as_lat_long=as_lat_long,
[6428]	624	isSouthHemisphere=isSouthHemisphere),
[4225]	625	self.get_all_attributes(),
	626	as_lat_long=as_lat_long)
[4238]	627	elif file_name[-4:] == ".urs" :
	628	msg = "ERROR: Can not write a .urs file as a relative file."
	629	assert absolute, msg
	630	_write_urs_file(file_name,
[4641]	631	self.get_data_points(as_lat_long=True,
[6428]	632	isSouthHemisphere=isSouthHemisphere))
[4126]	633	else:
	634	msg = 'Unknown file type %s ' %file_name
[6081]	635	raise IOError, msg
	636
	637	##
	638	# @brief Get a subset of data that is referred to by 'indices'.
	639	# @param indices A list of indices to select data subset with.
	640	# @return A geospatial object containing data subset.
[4126]	641	def get_sample(self, indices):
	642	""" Returns a object which is a subset of the original
	643	and the data points and attributes in this new object refer to
	644	the indices provided
[6081]	645
[4126]	646	Input
	647	indices- a list of integers that represent the new object
	648	Output
[6081]	649	New geospatial data object representing points specified by
	650	the indices
	651	"""
	652
[6304]	653	# FIXME: add the geo_reference to this
[4126]	654	points = self.get_data_points()
[6410]	655	sampled_points = num.take(points, indices, axis=0)
[4126]	656
	657	attributes = self.get_all_attributes()
	658
	659	sampled_attributes = {}
	660	if attributes is not None:
	661	for key, att in attributes.items():
[6410]	662	sampled_attributes[key] = num.take(att, indices, axis=0)
[4126]	663
[6081]	664	return Geospatial_data(sampled_points, sampled_attributes)
[5146]	665
[6081]	666	##
	667	# @brief Split one geospatial object into two.
	668	# @param factor Relative size to make first result object.
	669	# @param seed_num Random 'seed' - used only for unit test.
	670	# @param verbose True if this function is to be verbose.
	671	# @note Points in each result object are selected randomly.
	672	def split(self, factor=0.5, seed_num=None, verbose=False):
[6304]	673	"""Returns two geospatial_data object, first is the size of the 'factor'
[4659]	674	smaller the original and the second is the remainder. The two
[6304]	675	new objects are disjoint sets of each other.
[6081]	676
	677	Points of the two new object have selected RANDOMLY.
	678
[4659]	679	This method create two lists of indices which are passed into
	680	get_sample. The lists are created using random numbers, and
	681	they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9)
	682	random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
[6081]	683
	684	Input - the factor which to split the object, if 0.1 then 10% of the
	685	together object will be returned
	686
	687	Output - two geospatial_data objects that are disjoint sets of the
	688	original
	689	"""
	690
[6410]	691	i = 0
[4126]	692	self_size = len(self)
	693	random_list = []
	694	remainder_list = []
[6428]	695	new_size = round(factor * self_size)
[6081]	696
[4776]	697	# Find unique random numbers
[4195]	698	if verbose: print "make unique random number list and get indices"
[4126]	699
[6410]	700	total = num.array(range(self_size), num.int) #array default#
[4501]	701	total_list = total.tolist()
[6081]	702
	703	if verbose: print "total list len", len(total_list)
	704
	705	# There will be repeated random numbers however will not be a
[4776]	706	# problem as they are being 'pop'ed out of array so if there
[6081]	707	# are two numbers the same they will pop different indicies,
[4776]	708	# still basically random
[4501]	709	## create list of non-unquie random numbers
	710	if verbose: print "create random numbers list %s long" %new_size
[6081]	711
[4776]	712	# Set seed if provided, mainly important for unit test!
[4816]	713	# plus recalcule seed when no seed provided.
[6428]	714	if seed_num is not None:
[6768]	715	seed(seed_num)
[4816]	716	else:
	717	seed()
[6081]	718
[6768]	719	#if verbose: print "seed:", get_seed()
[6081]	720
	721	random_num = randint(0, self_size-1, (int(new_size),))
[4501]	722	random_num = random_num.tolist()
[4126]	723
[6304]	724	# need to sort and reverse so the pop() works correctly
[4501]	725	random_num.sort()
[6081]	726	random_num.reverse()
	727
[4501]	728	if verbose: print "make random number list and get indices"
[6081]	729
[6410]	730	j = 0
	731	k = 1
[4501]	732	remainder_list = total_list[:]
[6081]	733
[4776]	734	# pops array index (random_num) from remainder_list
[6428]	735	# (which starts as the total_list and appends to random_list)
[4501]	736	random_num_len = len(random_num)
	737	for i in random_num:
	738	random_list.append(remainder_list.pop(i))
[6081]	739	j += 1
[6304]	740	# prints progress
[6081]	741	if verbose and round(random_num_len/10*k) == j:
	742	print '(%s/%s)' % (j, random_num_len)
	743	k += 1
	744
[4776]	745	# FIXME: move to tests, it might take a long time
[6428]	746	# then create an array of random length between 500 and 1000,
[6081]	747	# and use a random factor between 0 and 1
[4776]	748	# setup for assertion
[4501]	749	test_total = random_list[:]
	750	test_total.extend(remainder_list)
[6081]	751	test_total.sort()
[6428]	752	msg = ('The two random lists made from the original list when added '
	753	'together DO NOT equal the original list')
[6081]	754	assert total_list == test_total, msg
[4501]	755
[4776]	756	# Get new samples
[4195]	757	if verbose: print "get values of indices for random list"
[4126]	758	G1 = self.get_sample(random_list)
[4195]	759	if verbose: print "get values of indices for opposite of random list"
[4126]	760	G2 = self.get_sample(remainder_list)
	761
	762	return G1, G2
	763
[6081]	764	##
	765	# @brief Allow iteration over this object.
[4126]	766	def __iter__(self):
[4569]	767	"""Read in the header, number_of_points and save the
	768	file pointer position
[4175]	769	"""
[6428]	770
[6304]	771	# FIXME - what to do if the file isn't there
[4129]	772
[4576]	773	# FIXME (Ole): Shouldn't this go into the constructor?
[6081]	774	# This method acts like the constructor when blocking.
[4576]	775	# ... and shouldn't it be called block_size?
[6081]	776	#
[4576]	777	if self.max_read_lines is None:
	778	self.max_read_lines = MAX_READ_LINES
[6081]	779
[4659]	780	if self.file_name[-4:] == ".pts":
[4776]	781	# See if the file is there. Throw a QUIET IO error if it isn't
[4175]	782	fd = open(self.file_name,'r')
	783	fd.close()
[6081]	784
[4776]	785	# Throws prints to screen if file not present
[6086]	786	self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
[6081]	787
	788	(self.blocking_georef,
	789	self.blocking_keys,
	790	self.number_of_points) = _read_pts_file_header(self.fid,
	791	self.verbose)
[4569]	792	self.start_row = 0
[6081]	793	self.last_row = self.number_of_points
[4252]	794	self.show_verbose = 0
[4569]	795	self.verbose_block_size = (self.last_row + 10)/10
[4576]	796	self.block_number = 0
	797	self.number_of_blocks = self.number_of_points/self.max_read_lines
	798	# This computes the number of full blocks. The last block may be
[6428]	799	# smaller and won't be included in this estimate.
[6081]	800
[4569]	801	if self.verbose is True:
[6428]	802	print ('Reading %d points (in ~%d blocks) from file %s. '
	803	% (self.number_of_points, self.number_of_blocks,
	804	self.file_name)),
	805	print ('Each block consists of %d data points'
	806	% self.max_read_lines)
[4174]	807	else:
[4776]	808	# Assume the file is a csv file
[4174]	809	file_pointer = open(self.file_name)
[6081]	810	self.header, self.file_pointer = _read_csv_file_header(file_pointer)
[4180]	811	self.blocking_georef = None # Used for reconciling zones
[4576]	812
[4126]	813	return self
[6081]	814
	815	##
	816	# @brief Read another block into the instance.
[4126]	817	def next(self):
[6081]	818	"""read a block, instanciate a new geospatial and return it"""
	819
[4659]	820	if self.file_name[-4:] == ".pts":
[4175]	821	if self.start_row == self.last_row:
[4776]	822	# Read the end of the file last iteration
	823	# Remove blocking attributes
[4175]	824	self.fid.close()
[4179]	825	del self.max_read_lines
	826	del self.blocking_georef
	827	del self.last_row
	828	del self.start_row
	829	del self.blocking_keys
	830	del self.fid
[4175]	831	raise StopIteration
	832	fin_row = self.start_row + self.max_read_lines
	833	if fin_row > self.last_row:
	834	fin_row = self.last_row
	835
[4252]	836	if self.verbose is True:
[6428]	837	if (self.show_verbose >= self.start_row
	838	and self.show_verbose < fin_row):
	839	print ('Reading block %d (points %d to %d) out of %d'
	840	% (self.block_number, self.start_row,
	841	fin_row, self.number_of_blocks))
[4576]	842
	843	self.show_verbose += max(self.max_read_lines,
	844	self.verbose_block_size)
	845
	846	# Read next block
[6081]	847	pointlist, att_dict, = _read_pts_file_blocking(self.fid,
	848	self.start_row,
	849	fin_row,
	850	self.blocking_keys)
	851
[4175]	852	geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
	853	self.start_row = fin_row
[6081]	854
	855	self.block_number += 1
[4174]	856	else:
[4776]	857	# Assume the file is a csv file
[4174]	858	try:
[6081]	859	(pointlist,
	860	att_dict,
	861	geo_ref,
[6428]	862	self.file_pointer) = _read_csv_file_blocking(self.file_pointer,
	863	self.header[:],
[6902]	864	max_read_lines=
[6428]	865	self.max_read_lines,
[6902]	866	verbose=
[6428]	867	self.verbose)
[4180]	868
	869	# Check that the zones haven't changed.
	870	if geo_ref is not None:
	871	geo_ref.reconcile_zones(self.blocking_georef)
	872	self.blocking_georef = geo_ref
	873	elif self.blocking_georef is not None:
[6428]	874	msg = ('Geo reference given, then not given.'
	875	' This should not happen.')
[4180]	876	raise ValueError, msg
	877	geo = Geospatial_data(pointlist, att_dict, geo_ref)
[4174]	878	except StopIteration:
	879	self.file_pointer.close()
[4179]	880	del self.header
	881	del self.file_pointer
[4174]	882	raise StopIteration
[4180]	883	except ANUGAError:
	884	self.file_pointer.close()
	885	del self.header
	886	del self.file_pointer
	887	raise
[4470]	888	except SyntaxError:
	889	self.file_pointer.close()
	890	del self.header
	891	del self.file_pointer
	892	# This should only be if there is a format error
[6428]	893	msg = ('Could not open file %s.\n%s'
	894	% (self.file_name, Error_message['IOError']))
[4470]	895	raise SyntaxError, msg
[4174]	896	return geo
[4776]	897
[4470]	898	##################### Error messages ###########
	899	Error_message = {}
	900	Em = Error_message
[6428]	901	Em['IOError'] = ('NOTE: The format for a comma separated .txt/.csv file is:\n'
	902	' 1st line: [column names]\n'
	903	' other lines: [x value], [y value], [attributes]\n'
	904	'\n'
	905	' for example:\n'
	906	' x, y, elevation, friction\n'
	907	' 0.6, 0.7, 4.9, 0.3\n'
	908	' 1.9, 2.8, 5, 0.3\n'
	909	' 2.7, 2.4, 5.2, 0.3\n'
	910	'\n'
	911	'The first two columns are assumed to be x, y coordinates.\n'
	912	'The attribute values must be numeric.\n')
[4126]	913
[6081]	914	##
	915	# @brief ??
	916	# @param latitudes ??
	917	# @param longitudes ??
	918	# @param geo_reference ??
	919	# @param data_points ??
	920	# @param points_are_lats_longs ??
[6441]	921	# @note IS THIS USED???
[4180]	922	def _set_using_lat_long(latitudes,
	923	longitudes,
	924	geo_reference,
	925	data_points,
	926	points_are_lats_longs):
[6304]	927	"""If the points has lat long info, assume it is in (lat, long) order."""
[6081]	928
[4180]	929	if geo_reference is not None:
[6428]	930	msg = ('A georeference is specified yet latitude and longitude '
	931	'are also specified!')
[4180]	932	raise ValueError, msg
[6081]	933
[4180]	934	if data_points is not None and not points_are_lats_longs:
[6428]	935	msg = ('Data points are specified yet latitude and longitude are '
	936	'also specified.')
[4180]	937	raise ValueError, msg
[6081]	938
[4180]	939	if points_are_lats_longs:
	940	if data_points is None:
[6081]	941	msg = "Data points are not specified."
[4180]	942	raise ValueError, msg
	943	lats_longs = ensure_numeric(data_points)
[6153]	944	latitudes = num.ravel(lats_longs[:,0:1])
	945	longitudes = num.ravel(lats_longs[:,1:])
[6081]	946
[4180]	947	if latitudes is None and longitudes is None:
[6081]	948	msg = "Latitudes and Longitudes are not specified."
[4180]	949	raise ValueError, msg
[6081]	950
[4180]	951	if latitudes is None:
[6081]	952	msg = "Longitudes are specified yet latitudes aren't."
[4180]	953	raise ValueError, msg
[6081]	954
[4180]	955	if longitudes is None:
[6081]	956	msg = "Latitudes are specified yet longitudes aren't."
[4180]	957	raise ValueError, msg
[6081]	958
[4180]	959	data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes,
	960	longitudes=longitudes)
	961	return data_points, Geo_reference(zone=zone)
[4569]	962
[6081]	963
	964	##
	965	# @brief Read a .pts data file.
	966	# @param file_name Path to file to read.
	967	# @param verbose True if this function is to be verbose.
	968	# @return (pointlist, attributes, geo_reference)
[4126]	969	def _read_pts_file(file_name, verbose=False):
	970	"""Read .pts NetCDF file
[6081]	971
[6441]	972	Return a (dict_points, dict_attribute, geo_ref)
[4126]	973	eg
[6441]	974	dict['points'] = [[1.0,2.0],[3.0,5.0]]
	975	dict['attributelist']['elevation'] = [[7.0,5.0]]
[6081]	976	"""
[4126]	977
	978	if verbose: print 'Reading ', file_name
[6081]	979
[4776]	980	# See if the file is there. Throw a QUIET IO error if it isn't
[4126]	981	fd = open(file_name,'r')
	982	fd.close()
[6081]	983
[4776]	984	# Throws prints to screen if file not present
[6086]	985	fid = NetCDFFile(file_name, netcdf_mode_r)
[6081]	986
[6153]	987	pointlist = num.array(fid.variables['points'])
[4126]	988	keys = fid.variables.keys()
[6081]	989
	990	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
	991
[4126]	992	try:
	993	keys.remove('points')
[6081]	994	except IOError, e:
	995	fid.close()
	996	msg = "Expected keyword 'points' but could not find it"
[4126]	997	raise IOError, msg
[6081]	998
[4126]	999	attributes = {}
	1000	for key in keys:
[6081]	1001	if verbose: print "reading attribute '%s'" % key
	1002
[6153]	1003	attributes[key] = num.array(fid.variables[key])
[6081]	1004
[4126]	1005	try:
	1006	geo_reference = Geo_reference(NetCDFObject=fid)
	1007	except AttributeError, e:
	1008	geo_reference = None
[6081]	1009
[4126]	1010	fid.close()
[6081]	1011
[4126]	1012	return pointlist, attributes, geo_reference
	1013
	1014
[6081]	1015	##
	1016	# @brief Read a .csv data file.
	1017	# @param file_name Path to the .csv file to read.
	1018	# @param verbose True if this function is to be verbose.
[4126]	1019	def _read_csv_file(file_name, verbose=False):
	1020	"""Read .csv file
[6081]	1021
[4126]	1022	Return a dic of array of points, and dic of array of attribute
	1023	eg
	1024	dic['points'] = [[1.0,2.0],[3.0,5.0]]
[6081]	1025	dic['attributelist']['elevation'] = [[7.0,5.0]]
[4126]	1026	"""
[6081]	1027
[4126]	1028	file_pointer = open(file_name)
	1029	header, file_pointer = _read_csv_file_header(file_pointer)
[4180]	1030	try:
[6081]	1031	(pointlist,
	1032	att_dict,
	1033	geo_ref,
	1034	file_pointer) = _read_csv_file_blocking(file_pointer,
	1035	header,
	1036	max_read_lines=1e30)
[6304]	1037	# If the file is bigger that this, block..
[6081]	1038	# FIXME (Ole) What's up here?
[4180]	1039	except ANUGAError:
	1040	file_pointer.close()
[6081]	1041	raise
	1042
[4126]	1043	file_pointer.close()
	1044
[6081]	1045	return pointlist, att_dict, geo_ref
	1046
	1047
	1048	##
	1049	# @brief Read a .csv file header.
	1050	# @param file_pointer Open descriptor of the file to read.
	1051	# @param delimiter Header line delimiter string, split on this string.
	1052	# @param verbose True if this function is to be verbose.
	1053	# @return A tuple of (<cleaned header string>, <input file_pointer>)
	1054
[4126]	1055	CSV_DELIMITER = ','
[6081]	1056
[4126]	1057	def _read_csv_file_header(file_pointer,
	1058	delimiter=CSV_DELIMITER,
	1059	verbose=False):
	1060	"""Read the header of a .csv file
	1061	Return a list of the header names
	1062	"""
[6081]	1063
[6410]	1064	line = file_pointer.readline()
[4126]	1065	header = clean_line(line, delimiter)
[6081]	1066
[4126]	1067	return header, file_pointer
	1068
[6081]	1069	##
	1070	# @brief Read a .csv file, with blocking.
	1071	# @param file_pointer Open descriptor of the file to read.
	1072	# @param header List of already read .csv header fields.
	1073	# @param delimiter Delimiter string header was split on.
	1074	# @param max_read_lines The max number of lines to read before blocking.
	1075	# @param verbose True if this function is to be verbose.
	1076	# @note Will throw IndexError, SyntaxError exceptions.
[6304]	1077	def _read_csv_file_blocking(file_pointer,
	1078	header,
[4126]	1079	delimiter=CSV_DELIMITER,
[4129]	1080	max_read_lines=MAX_READ_LINES,
[4126]	1081	verbose=False):
[6081]	1082	"""Read the body of a .csv file.
[4126]	1083	header: The list header of the csv file, with the x and y labels.
	1084	"""
[6081]	1085
[4126]	1086	points = []
	1087	pointattributes = []
	1088	att_dict = {}
	1089
[4776]	1090	# This is to remove the x and y headers.
[4129]	1091	header = header[:]
[4470]	1092	try:
	1093	x_header = header.pop(0)
	1094	y_header = header.pop(0)
	1095	except IndexError:
	1096	# if there are not two columns this will occur.
	1097	# eg if it is a space seperated file
	1098	raise SyntaxError
[6081]	1099
[4126]	1100	read_lines = 0
[6081]	1101	while read_lines < max_read_lines:
[4126]	1102	line = file_pointer.readline()
[6081]	1103	numbers = clean_line(line, delimiter)
[4126]	1104	if len(numbers) <= 1:
	1105	break
	1106	if line[0] == '#':
	1107	continue
[6081]	1108
[4126]	1109	read_lines += 1
[6081]	1110
[4470]	1111	try:
	1112	x = float(numbers[0])
	1113	y = float(numbers[1])
	1114	points.append([x,y])
	1115	numbers.pop(0)
	1116	numbers.pop(0)
	1117	if len(header) != len(numbers):
[6081]	1118	file_pointer.close()
[6428]	1119	msg = ('File load error. '
	1120	'There might be a problem with the file header.')
[4470]	1121	raise SyntaxError, msg
[6153]	1122	for i,n in enumerate(numbers):
	1123	n.strip()
	1124	if n != '\n' and n != '':
	1125	att_dict.setdefault(header[i],[]).append(float(n))
[4470]	1126	except ValueError:
	1127	raise SyntaxError
[6081]	1128
[4126]	1129	if points == []:
	1130	raise StopIteration
[6081]	1131
[6304]	1132	pointlist = num.array(points, num.float)
[4126]	1133	for key in att_dict.keys():
[6304]	1134	att_dict[key] = num.array(att_dict[key], num.float)
[4180]	1135
[4776]	1136	# Do stuff here so the info is in lat's and longs
[4180]	1137	geo_ref = None
	1138	x_header = lower(x_header[:3])
	1139	y_header = lower(y_header[:3])
[6081]	1140	if (x_header == 'lon' or x_header == 'lat') \
	1141	and (y_header == 'lon' or y_header == 'lat'):
[4180]	1142	if x_header == 'lon':
[6153]	1143	longitudes = num.ravel(pointlist[:,0:1])
	1144	latitudes = num.ravel(pointlist[:,1:])
[4180]	1145	else:
[6153]	1146	latitudes = num.ravel(pointlist[:,0:1])
	1147	longitudes = num.ravel(pointlist[:,1:])
[6081]	1148
[4180]	1149	pointlist, geo_ref = _set_using_lat_long(latitudes,
	1150	longitudes,
	1151	geo_reference=None,
	1152	data_points=None,
	1153	points_are_lats_longs=False)
[4126]	1154
[6081]	1155	return pointlist, att_dict, geo_ref, file_pointer
	1156
	1157
	1158	##
	1159	# @brief Read a .pts file header.
	1160	# @param fid Handle to the open .pts file.
	1161	# @param verbose True if the function is to be verbose.
	1162	# @return (geo_reference, keys, fid.dimensions['number_of_points'])
	1163	# @note Will throw IOError and AttributeError exceptions.
[4175]	1164	def _read_pts_file_header(fid, verbose=False):
[6428]	1165	'''Read the geo_reference and number_of_points from a .pts file'''
[6081]	1166
[4175]	1167	keys = fid.variables.keys()
	1168	try:
	1169	keys.remove('points')
[6081]	1170	except IOError, e:
	1171	fid.close()
	1172	msg = "Expected keyword 'points' but could not find it."
[4175]	1173	raise IOError, msg
[6081]	1174
	1175	if verbose: print 'Got %d variables: %s' % (len(keys), keys)
	1176
[4175]	1177	try:
	1178	geo_reference = Geo_reference(NetCDFObject=fid)
	1179	except AttributeError, e:
	1180	geo_reference = None
	1181
	1182	return geo_reference, keys, fid.dimensions['number_of_points']
	1183
[6081]	1184
	1185	##
[6441]	1186	# @brief Read the body of a .pts file, with blocking.
[6081]	1187	# @param fid Handle to already open file.
	1188	# @param start_row Start row index of points to return.
	1189	# @param fin_row End row index of points to return.
	1190	# @param keys Iterable of keys to return.
	1191	# @return Tuple of (pointlist, attributes).
[4175]	1192	def _read_pts_file_blocking(fid, start_row, fin_row, keys):
[6441]	1193	'''Read the body of a .pts file.'''
[4175]	1194
[6153]	1195	pointlist = num.array(fid.variables['points'][start_row:fin_row])
[6081]	1196
[4175]	1197	attributes = {}
	1198	for key in keys:
[6153]	1199	attributes[key] = num.array(fid.variables[key][start_row:fin_row])
[4175]	1200
	1201	return pointlist, attributes
[6081]	1202
	1203
	1204	##
	1205	# @brief Write a .pts data file.
	1206	# @param file_name Path to the file to write.
	1207	# @param write_data_points Data points to write.
	1208	# @param write_attributes Attributes to write.
	1209	# @param write_geo_reference Georef to write.
[4126]	1210	def _write_pts_file(file_name,
	1211	write_data_points,
[6081]	1212	write_attributes=None,
[4126]	1213	write_geo_reference=None):
[6081]	1214	"""Write .pts NetCDF file
[4126]	1215
	1216	NOTE: Below might not be valid ask Duncan : NB 5/2006
[6081]	1217
[4126]	1218	WARNING: This function mangles the point_atts data structure
[6304]	1219	# F??ME: (DSG)This format has issues.
[6081]	1220	# There can't be an attribute called points
[4126]	1221	# consider format change
	1222	# method changed by NB not sure if above statement is correct
[6081]	1223
[4126]	1224	should create new test for this
	1225	legal_keys = ['pointlist', 'attributelist', 'geo_reference']
	1226	for key in point_atts.keys():
[6081]	1227	msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
[4126]	1228	assert key in legal_keys, msg
[6081]	1229	"""
	1230
[4126]	1231	# NetCDF file definition
[6086]	1232	outfile = NetCDFFile(file_name, netcdf_mode_w)
[6081]	1233
[4776]	1234	# Create new file
[4126]	1235	outfile.institution = 'Geoscience Australia'
[6410]	1236	outfile.description = ('NetCDF format for compact and portable storage '
	1237	'of spatial point data')
[6081]	1238
[4776]	1239	# Dimension definitions
[4126]	1240	shape = write_data_points.shape[0]
[6081]	1241	outfile.createDimension('number_of_points', shape)
[6304]	1242	outfile.createDimension('number_of_dimensions', 2) # This is 2d data
[6081]	1243
[4776]	1244	# Variable definition
[6428]	1245	outfile.createVariable('points', netcdf_float,
	1246	('number_of_points', 'number_of_dimensions'))
[4126]	1247
[6304]	1248	# create variables
	1249	outfile.variables['points'][:] = write_data_points
[4126]	1250
	1251	if write_attributes is not None:
	1252	for key in write_attributes.keys():
[6304]	1253	outfile.createVariable(key, netcdf_float, ('number_of_points',))
	1254	outfile.variables[key][:] = write_attributes[key]
[6081]	1255
[4126]	1256	if write_geo_reference is not None:
[4452]	1257	write_NetCDF_georeference(write_geo_reference, outfile)
[6081]	1258
	1259	outfile.close()
	1260
	1261
	1262	##
	1263	# @brief Write a .csv data file.
	1264	# @param file_name Path to the file to write.
	1265	# @param write_data_points Data points to write.
	1266	# @param write_attributes Attributes to write.
	1267	# @param as_lat_long True if points are lat/lon, else x/y.
	1268	# @param delimiter The CSV delimiter to use.
[4154]	1269	def _write_csv_file(file_name,
	1270	write_data_points,
[4225]	1271	write_attributes=None,
	1272	as_lat_long=False,
[4154]	1273	delimiter=','):
[6304]	1274	"""Write a .csv file."""
[6081]	1275
	1276	points = write_data_points
[4154]	1277	pointattributes = write_attributes
[6081]	1278
	1279	fd = open(file_name, 'w')
	1280
[4225]	1281	if as_lat_long:
	1282	titlelist = "latitude" + delimiter + "longitude" + delimiter
	1283	else:
	1284	titlelist = "x" + delimiter + "y" + delimiter
[6081]	1285
	1286	if pointattributes is not None:
[4154]	1287	for title in pointattributes.keys():
	1288	titlelist = titlelist + title + delimiter
	1289	titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
[6081]	1290
	1291	fd.write(titlelist + "\n")
	1292
[4225]	1293	# <x/lat> <y/long> [attributes]
[4154]	1294	for i, vert in enumerate( points):
[6081]	1295	if pointattributes is not None:
[4154]	1296	attlist = ","
	1297	for att in pointattributes.keys():
[6081]	1298	attlist = attlist + str(pointattributes[att][i]) + delimiter
[4154]	1299	attlist = attlist[0:-len(delimiter)] # remove the last delimiter
	1300	attlist.strip()
	1301	else:
	1302	attlist = ''
	1303
[6081]	1304	fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
[4154]	1305
	1306	fd.close()
[6081]	1307
	1308
	1309	##
	1310	# @brief Write a URS file.
	1311	# @param file_name The path of the file to write.
	1312	# @param points
	1313	# @param delimiter
	1314	def _write_urs_file(file_name, points, delimiter=' '):
	1315	"""Write a URS format file.
[4238]	1316	export a file, file_name, with the urs format
	1317	the data points are in lats and longs
[6081]	1318	"""
	1319
	1320	fd = open(file_name, 'w')
	1321
	1322	# first line is # points
	1323	fd.write(str(len(points)) + "\n")
	1324
[4238]	1325	# <lat> <long> <id#>
	1326	for i, vert in enumerate( points):
[6081]	1327	fd.write(str(round(vert[0],7)) + delimiter +
	1328	str(round(vert[1],7)) + delimiter + str(i) + "\n")
	1329
[4238]	1330	fd.close()
[6081]	1331
	1332
	1333	##
	1334	# @brief ??
	1335	# @param point_atts ??
	1336	# @return ??
[4126]	1337	def _point_atts2array(point_atts):
[6304]	1338	point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
[6081]	1339
[4126]	1340	for key in point_atts['attributelist'].keys():
[6081]	1341	point_atts['attributelist'][key] = \
[6304]	1342	num.array(point_atts['attributelist'][key], num.float)
[6081]	1343
[4126]	1344	return point_atts
	1345
	1346
[6081]	1347	##
	1348	# @brief Convert geospatial object to a points dictionary.
	1349	# @param geospatial_data The geospatial object to convert.
	1350	# @return A points dictionary.
[4126]	1351	def geospatial_data2points_dictionary(geospatial_data):
[6304]	1352	"""Convert geospatial data to points_dictionary"""
[4126]	1353
	1354	points_dictionary = {}
	1355	points_dictionary['pointlist'] = geospatial_data.data_points
	1356
	1357	points_dictionary['attributelist'] = {}
	1358
	1359	for attribute_name in geospatial_data.attributes.keys():
	1360	val = geospatial_data.attributes[attribute_name]
	1361	points_dictionary['attributelist'][attribute_name] = val
	1362
	1363	points_dictionary['geo_reference'] = geospatial_data.geo_reference
	1364
	1365	return points_dictionary
	1366
[6081]	1367
	1368	##
	1369	# @brief Convert a points dictionary to a geospatial object.
	1370	# @param points_dictionary A points dictionary to convert.
[4126]	1371	def points_dictionary2geospatial_data(points_dictionary):
[6304]	1372	"""Convert points_dictionary to geospatial data object"""
[4126]	1373
[6428]	1374	msg = "Points dictionary must have key 'pointlist'"
[4126]	1375	assert points_dictionary.has_key('pointlist'), msg
	1376
[6428]	1377	msg = "Points dictionary must have key 'attributelist'"
[6081]	1378	assert points_dictionary.has_key('attributelist'), msg
[4126]	1379
	1380	if points_dictionary.has_key('geo_reference'):
	1381	geo = points_dictionary['geo_reference']
	1382	else:
	1383	geo = None
[6081]	1384
[4126]	1385	return Geospatial_data(points_dictionary['pointlist'],
	1386	points_dictionary['attributelist'],
[6428]	1387	geo_reference=geo)
[4126]	1388
[6081]	1389
	1390	##
	1391	# @brief Ensure that points are in absolute coordinates.
	1392	# @param points A list or array of points to check, or geospatial object.
	1393	# @param geo_reference If supplied,
	1394	# @return ??
[4126]	1395	def ensure_absolute(points, geo_reference=None):
[5730]	1396	"""Ensure that points are in absolute coordinates.
[6081]	1397
[4126]	1398	This function inputs several formats and
	1399	outputs one format. - a numeric array of absolute points.
	1400
[5730]	1401	Input formats are;
	1402	points: List or numeric array of coordinate pairs [xi, eta] of
[4255]	1403	points or geospatial object or points file name
[4126]	1404
	1405	mesh_origin: A geo_reference object or 3-tuples consisting of
	1406	UTM zone, easting and northing.
	1407	If specified vertex coordinates are assumed to be
	1408	relative to their respective origins.
	1409	"""
[5730]	1410
	1411	# Input check
	1412	if isinstance(points, basestring):
[6304]	1413	# It's a string - assume it is a point file
[5730]	1414	points = Geospatial_data(file_name=points)
[6081]	1415
[5730]	1416	if isinstance(points, Geospatial_data):
	1417	points = points.get_data_points(absolute=True)
[6081]	1418	msg = 'Use a Geospatial_data object or a mesh origin, not both.'
[4126]	1419	assert geo_reference == None, msg
	1420	else:
[6689]	1421	points = ensure_numeric(copy.copy(points), num.float)
[6081]	1422
[5730]	1423	# Sort of geo_reference and convert points
[4126]	1424	if geo_reference is None:
[6428]	1425	geo = None # Geo_reference()
[4126]	1426	else:
	1427	if isinstance(geo_reference, Geo_reference):
	1428	geo = geo_reference
	1429	else:
	1430	geo = Geo_reference(geo_reference[0],
	1431	geo_reference[1],
	1432	geo_reference[2])
	1433	points = geo.get_absolute(points)
[6081]	1434
[4126]	1435	return points
	1436
[6081]	1437
	1438	##
	1439	# @brief
	1440	# @param points
	1441	# @param geo_reference
	1442	# @return A geospatial object.
[4126]	1443	def ensure_geospatial(points, geo_reference=None):
[6304]	1444	"""Convert various data formats to a geospatial_data instance.
[4126]	1445
	1446	Inputed formats are;
[6081]	1447	points: List or numeric array of coordinate pairs [xi, eta] of
	1448	points or geospatial object
[4126]	1449
	1450	mesh_origin: A geo_reference object or 3-tuples consisting of
	1451	UTM zone, easting and northing.
	1452	If specified vertex coordinates are assumed to be
	1453	relative to their respective origins.
	1454	"""
[6081]	1455
[5730]	1456	# Input check
	1457	if isinstance(points, Geospatial_data):
[6081]	1458	msg = "Use a Geospatial_data object or a mesh origin, not both."
[5730]	1459	assert geo_reference is None, msg
[6081]	1460	return points
[4126]	1461	else:
[5730]	1462	# List or numeric array of absolute points
[6304]	1463	points = ensure_numeric(points, num.float)
[5730]	1464
[6081]	1465	# Sort out geo reference
[4126]	1466	if geo_reference is None:
	1467	geo = None
	1468	else:
	1469	if isinstance(geo_reference, Geo_reference):
	1470	geo = geo_reference
	1471	else:
	1472	geo = Geo_reference(geo_reference[0],
	1473	geo_reference[1],
	1474	geo_reference[2])
[6081]	1475
	1476	# Create Geospatial_data object with appropriate geo reference and return
	1477	points = Geospatial_data(data_points=points, geo_reference=geo)
	1478
[4126]	1479	return points
	1480
[4642]	1481
[6081]	1482	##
	1483	# @brief
	1484	# @param data_file
	1485	# @param alpha_list
	1486	# @param mesh_file
	1487	# @param boundary_poly
	1488	# @param mesh_resolution
	1489	# @param north_boundary
	1490	# @param south_boundary
	1491	# @param east_boundary
	1492	# @param west_boundary
	1493	# @param plot_name
	1494	# @param split_factor
	1495	# @param seed_num
	1496	# @param cache
	1497	# @param verbose
	1498	def find_optimal_smoothing_parameter(data_file,
[4744]	1499	alpha_list=None,
[4642]	1500	mesh_file=None,
[4847]	1501	boundary_poly=None,
[4642]	1502	mesh_resolution=100000,
	1503	north_boundary=None,
	1504	south_boundary=None,
	1505	east_boundary=None,
	1506	west_boundary=None,
[4745]	1507	plot_name='all_alphas',
[4879]	1508	split_factor=0.1,
[4744]	1509	seed_num=None,
	1510	cache=False,
[6081]	1511	verbose=False):
[6304]	1512	"""Removes a small random sample of points from 'data_file'.
	1513	Then creates models with different alpha values from 'alpha_list' and
	1514	cross validates the predicted value to the previously removed point data.
	1515	Returns the alpha value which has the smallest covariance.
[6081]	1516
[4744]	1517	data_file: must not contain points outside the boundaries defined
[6441]	1518	and it must be either a pts, txt or csv file.
[6081]	1519
[4744]	1520	alpha_list: the alpha values to test in a single list
[6081]	1521
[4816]	1522	mesh_file: name of the created mesh file or if passed in will read it.
[6081]	1523	NOTE, if there is a mesh file mesh_resolution,
	1524	north_boundary, south... etc will be ignored.
	1525
[4744]	1526	mesh_resolution: the maximum area size for a triangle
[6081]	1527
[4744]	1528	north_boundary... west_boundary: the value of the boundary
[6081]	1529
[4744]	1530	plot_name: the name for the plot contain the results
[6081]	1531
[4744]	1532	seed_num: the seed to the random number generator
[6081]	1533
[4744]	1534	USAGE:
[6081]	1535	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
[4744]	1536	alpha_list=[0.0001, 0.01, 1],
	1537	mesh_file=None,
	1538	mesh_resolution=3,
	1539	north_boundary=5,
	1540	south_boundary=-5,
	1541	east_boundary=5,
	1542	west_boundary=-5,
	1543	plot_name='all_alphas',
	1544	seed_num=100000,
	1545	verbose=False)
[6081]	1546
	1547	OUTPUT: returns the minumum normalised covalance calculate AND the
[4744]	1548	alpha that created it. PLUS writes a plot of the results
[6081]	1549
[5146]	1550	NOTE: code will not work if the data_file extent is greater than the
	1551	boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
[4744]	1552	"""
[4642]	1553
	1554	from anuga.shallow_water import Domain
	1555	from anuga.geospatial_data.geospatial_data import Geospatial_data
	1556	from anuga.pmesh.mesh_interface import create_mesh_from_regions
	1557	from anuga.utilities.numerical_tools import cov
[6081]	1558	from anuga.utilities.polygon import is_inside_polygon
	1559	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
[4126]	1560
[6410]	1561	attribute_smoothed = 'elevation'
[4744]	1562
[4642]	1563	if mesh_file is None:
[5010]	1564	if verbose: print "building mesh"
[6081]	1565	mesh_file = 'temp.msh'
[4744]	1566
[6428]	1567	if (north_boundary is None or south_boundary is None
	1568	or east_boundary is None or west_boundary is None):
[6081]	1569	no_boundary = True
[4816]	1570	else:
[6081]	1571	no_boundary = False
	1572
[4816]	1573	if no_boundary is True:
[6081]	1574	msg = 'All boundaries must be defined'
[6304]	1575	raise Expection, msg
[4642]	1576
[6428]	1577	poly_topo = [[east_boundary, south_boundary],
	1578	[east_boundary, north_boundary],
	1579	[west_boundary, north_boundary],
	1580	[west_boundary, south_boundary]]
[6081]	1581
[4816]	1582	create_mesh_from_regions(poly_topo,
	1583	boundary_tags={'back': [2],
	1584	'side': [1,3],
	1585	'ocean': [0]},
[6081]	1586	maximum_triangle_area=mesh_resolution,
	1587	filename=mesh_file,
	1588	use_cache=cache,
	1589	verbose=verbose)
[4816]	1590
	1591	else: # if mesh file provided
[6304]	1592	# test mesh file exists?
[6081]	1593	if verbose: "reading from file: %s" % mesh_file
[4816]	1594	if access(mesh_file,F_OK) == 0:
[6081]	1595	msg = "file %s doesn't exist!" % mesh_file
[4816]	1596	raise IOError, msg
	1597
[6304]	1598	# split topo data
[6081]	1599	if verbose: print 'Reading elevation file: %s' % data_file
[4744]	1600	G = Geospatial_data(file_name = data_file)
[5010]	1601	if verbose: print 'Start split'
[6081]	1602	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
[5010]	1603	if verbose: print 'Finish split'
[6081]	1604	points = G_small.get_data_points()
[4847]	1605
[4744]	1606	if verbose: print "Number of points in sample to compare: ", len(points)
[6081]	1607
	1608	if alpha_list == None:
[4744]	1609	alphas = [0.001,0.01,100]
[6428]	1610	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,
	1611	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
[4744]	1612	else:
[6081]	1613	alphas = alpha_list
[5003]	1614
[6304]	1615	# creates array with columns 1 and 2 are x, y. column 3 is elevation
	1616	# 4 onwards is the elevation_predicted using the alpha, which will
	1617	# be compared later against the real removed data
	1618	data = num.array([], dtype=num.float)
[5003]	1619
[6410]	1620	data = num.resize(data, (len(points), 3+len(alphas)))
[5003]	1621
[6304]	1622	# gets relative point from sample
[6081]	1623	data[:,0] = points[:,0]
	1624	data[:,1] = points[:,1]
	1625	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
	1626	data[:,2] = elevation_sample
[5003]	1627
[6410]	1628	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
[5003]	1629
	1630	if verbose: print 'Setup computational domains with different alphas'
	1631
[6081]	1632	for i, alpha in enumerate(alphas):
[6304]	1633	# add G_other data to domains with different alphas
[6081]	1634	if verbose:
[6428]	1635	print '\nCalculating domain and mesh for Alpha =', alpha, '\n'
[5003]	1636	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
[6081]	1637	if verbose: print domain.statistics()
	1638	domain.set_quantity(attribute_smoothed,
	1639	geospatial_data=G_other,
	1640	use_cache=cache,
	1641	verbose=verbose,
	1642	alpha=alpha)
[5003]	1643
[5729]	1644	# Convert points to geospatial data for use with get_values below
	1645	points_geo = Geospatial_data(points, domain.geo_reference)
[6081]	1646
[6304]	1647	# returns the predicted elevation of the points that were "split" out
	1648	# of the original data set for one particular alpha
[5010]	1649	if verbose: print 'Get predicted elevation for location to be compared'
[6081]	1650	elevation_predicted = \
	1651	domain.quantities[attribute_smoothed].\
	1652	get_values(interpolation_points=points_geo)
	1653
[6304]	1654	# add predicted elevation to array that starts with x, y, z...
[6081]	1655	data[:,i+3] = elevation_predicted
[5003]	1656
[6081]	1657	sample_cov = cov(elevation_sample)
	1658	ele_cov = cov(elevation_sample - elevation_predicted)
[6428]	1659	normal_cov[i,:] = [alpha, ele_cov / sample_cov]
[5003]	1660
[6081]	1661	if verbose:
	1662	print 'Covariance for alpha ', normal_cov[i][0], '= ', \
	1663	normal_cov[i][1]
	1664	print '-------------------------------------------- \n'
[5003]	1665
[6081]	1666	normal_cov0 = normal_cov[:,0]
[6410]	1667	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
[6081]	1668
[5003]	1669	if plot_name is not None:
[6081]	1670	from pylab import savefig, semilogx, loglog
	1671
	1672	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
	1673	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
	1674	savefig(plot_name, dpi=300)
	1675
[5003]	1676	if mesh_file == 'temp.msh':
	1677	remove(mesh_file)
[6081]	1678
	1679	if verbose:
[5004]	1680	print 'Final results:'
[6081]	1681	for i, alpha in enumerate(alphas):
[6428]	1682	print ('covariance for alpha %s = %s '
	1683	% (normal_cov[i][0], normal_cov[i][1]))
	1684	print ('\nOptimal alpha is: %s '
	1685	% normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0])
[5004]	1686
[5146]	1687	# covariance and optimal alpha
[6081]	1688	return (min(normal_cov_new[:,1]),
[6153]	1689	normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
[5003]	1690
[6081]	1691
	1692	##
	1693	# @brief
	1694	# @param data_file
	1695	# @param alpha_list
	1696	# @param mesh_file
	1697	# @param boundary_poly
	1698	# @param mesh_resolution
	1699	# @param north_boundary
	1700	# @param south_boundary
	1701	# @param east_boundary
	1702	# @param west_boundary
	1703	# @param plot_name
	1704	# @param split_factor
	1705	# @param seed_num
	1706	# @param cache
	1707	# @param verbose
	1708	def old_find_optimal_smoothing_parameter(data_file,
	1709	alpha_list=None,
	1710	mesh_file=None,
	1711	boundary_poly=None,
	1712	mesh_resolution=100000,
	1713	north_boundary=None,
	1714	south_boundary=None,
	1715	east_boundary=None,
	1716	west_boundary=None,
	1717	plot_name='all_alphas',
	1718	split_factor=0.1,
	1719	seed_num=None,
	1720	cache=False,
	1721	verbose=False):
[5003]	1722	"""
	1723	data_file: must not contain points outside the boundaries defined
[6081]	1724	and it either a pts, txt or csv file.
	1725
[5003]	1726	alpha_list: the alpha values to test in a single list
[6081]	1727
[5003]	1728	mesh_file: name of the created mesh file or if passed in will read it.
[6081]	1729	NOTE, if there is a mesh file mesh_resolution,
	1730	north_boundary, south... etc will be ignored.
	1731
[5003]	1732	mesh_resolution: the maximum area size for a triangle
[6081]	1733
[5003]	1734	north_boundary... west_boundary: the value of the boundary
[6081]	1735
[5003]	1736	plot_name: the name for the plot contain the results
[6081]	1737
[5003]	1738	seed_num: the seed to the random number generator
[6081]	1739
[5003]	1740	USAGE:
[6081]	1741	value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
[5003]	1742	alpha_list=[0.0001, 0.01, 1],
	1743	mesh_file=None,
	1744	mesh_resolution=3,
	1745	north_boundary=5,
	1746	south_boundary=-5,
	1747	east_boundary=5,
	1748	west_boundary=-5,
	1749	plot_name='all_alphas',
	1750	seed_num=100000,
	1751	verbose=False)
[6081]	1752
	1753	OUTPUT: returns the minumum normalised covalance calculate AND the
	1754	alpha that created it. PLUS writes a plot of the results
	1755
[5003]	1756	NOTE: code will not work if the data_file extend is greater than the
[6081]	1757	boundary_polygon or the north_boundary...west_boundary
[5003]	1758	"""
	1759
	1760	from anuga.shallow_water import Domain
	1761	from anuga.geospatial_data.geospatial_data import Geospatial_data
	1762	from anuga.pmesh.mesh_interface import create_mesh_from_regions
	1763	from anuga.utilities.numerical_tools import cov
[6081]	1764	from anuga.utilities.polygon import is_inside_polygon
	1765	from anuga.fit_interpolate.benchmark_least_squares import mem_usage
[5003]	1766
[6081]	1767	attribute_smoothed = 'elevation'
	1768
[5003]	1769	if mesh_file is None:
[6081]	1770	mesh_file = 'temp.msh'
[5003]	1771
[6428]	1772	if (north_boundary is None or south_boundary is None
	1773	or east_boundary is None or west_boundary is None):
[6081]	1774	no_boundary = True
[5003]	1775	else:
[6081]	1776	no_boundary = False
	1777
[5003]	1778	if no_boundary is True:
[6081]	1779	msg = 'All boundaries must be defined'
[6304]	1780	raise Expection, msg
[5003]	1781
[6428]	1782	poly_topo = [[east_boundary, south_boundary],
	1783	[east_boundary, north_boundary],
	1784	[west_boundary, north_boundary],
	1785	[west_boundary, south_boundary]]
[6081]	1786
[5003]	1787	create_mesh_from_regions(poly_topo,
	1788	boundary_tags={'back': [2],
	1789	'side': [1,3],
	1790	'ocean': [0]},
[6081]	1791	maximum_triangle_area=mesh_resolution,
	1792	filename=mesh_file,
	1793	use_cache=cache,
	1794	verbose=verbose)
[5003]	1795
	1796	else: # if mesh file provided
[6304]	1797	# test mesh file exists?
[5003]	1798	if access(mesh_file,F_OK) == 0:
[6081]	1799	msg = "file %s doesn't exist!" % mesh_file
[5003]	1800	raise IOError, msg
	1801
[6304]	1802	# split topo data
[6081]	1803	G = Geospatial_data(file_name=data_file)
[5003]	1804	if verbose: print 'start split'
[6081]	1805	G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
[5003]	1806	if verbose: print 'finish split'
[6081]	1807	points = G_small.get_data_points()
[5003]	1808
	1809	if verbose: print "Number of points in sample to compare: ", len(points)
[6081]	1810
	1811	if alpha_list == None:
[5003]	1812	alphas = [0.001,0.01,100]
[6428]	1813	#alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,
	1814	# 0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
[5003]	1815	else:
[6081]	1816	alphas = alpha_list
	1817
[4642]	1818	domains = {}
	1819
[4744]	1820	if verbose: print 'Setup computational domains with different alphas'
[5003]	1821
[4642]	1822	for alpha in alphas:
[6304]	1823	# add G_other data to domains with different alphas
[6081]	1824	if verbose:
[6428]	1825	print '\nCalculating domain and mesh for Alpha =', alpha, '\n'
[4847]	1826	domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
[6081]	1827	if verbose: print domain.statistics()
	1828	domain.set_quantity(attribute_smoothed,
	1829	geospatial_data=G_other,
	1830	use_cache=cache,
	1831	verbose=verbose,
	1832	alpha=alpha)
	1833	domains[alpha] = domain
[4642]	1834
[6304]	1835	# creates array with columns 1 and 2 are x, y. column 3 is elevation
	1836	# 4 onwards is the elevation_predicted using the alpha, which will
	1837	# be compared later against the real removed data
	1838	data = num.array([], dtype=num.float)
[4744]	1839
[6153]	1840	data = num.resize(data, (len(points), 3+len(alphas)))
[4744]	1841
[6304]	1842	# gets relative point from sample
[6081]	1843	data[:,0] = points[:,0]
	1844	data[:,1] = points[:,1]
	1845	elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
	1846	data[:,2] = elevation_sample
[4642]	1847
[6304]	1848	normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
[4642]	1849
[6081]	1850	if verbose:
	1851	print 'Determine difference between predicted results and actual data'
[6428]	1852
[6081]	1853	for i, alpha in enumerate(domains):
	1854	if verbose: print'Alpha =', alpha
	1855
	1856	points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
[6304]	1857	# returns the predicted elevation of the points that were "split" out
	1858	# of the original data set for one particular alpha
[6081]	1859	elevation_predicted = \
	1860	domains[alpha].quantities[attribute_smoothed].\
	1861	get_values(interpolation_points=points_geo)
	1862
[6304]	1863	# add predicted elevation to array that starts with x, y, z...
[6081]	1864	data[:,i+3] = elevation_predicted
[4642]	1865
[6081]	1866	sample_cov = cov(elevation_sample)
	1867	ele_cov = cov(elevation_sample - elevation_predicted)
	1868	normal_cov[i,:] = [alpha,ele_cov / sample_cov]
	1869	print 'memory usage during compare', mem_usage()
	1870	if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
[4642]	1871
[6081]	1872	normal_cov0 = normal_cov[:,0]
[6410]	1873	normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
[4745]	1874
	1875	if plot_name is not None:
	1876	from pylab import savefig,semilogx,loglog
[6081]	1877
	1878	semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
	1879	loglog(normal_cov_new[:,0], normal_cov_new[:,1])
	1880	savefig(plot_name, dpi=300)
[4816]	1881	if mesh_file == 'temp.msh':
	1882	remove(mesh_file)
[4642]	1883
[6081]	1884	return (min(normal_cov_new[:,1]),
[6153]	1885	normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
[6081]	1886
	1887
[4126]	1888	if __name__ == "__main__":
[4178]	1889	pass
[6081]	1890

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 7207

Download in other formats: