[4126] | 1 | """Class Geospatial_data - Manipulation of locations on the planet and |
---|
| 2 | associated attributes. |
---|
| 3 | |
---|
| 4 | """ |
---|
[4178] | 5 | from sys import maxint |
---|
[4744] | 6 | from os import access, F_OK, R_OK,remove |
---|
[4126] | 7 | from types import DictType |
---|
[4150] | 8 | from warnings import warn |
---|
[4180] | 9 | from string import lower |
---|
[4126] | 10 | from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \ |
---|
| 11 | size, shape |
---|
[4501] | 12 | #from Array import tolist |
---|
[4816] | 13 | from RandomArray import randint, seed, get_seed |
---|
[4452] | 14 | from copy import deepcopy |
---|
| 15 | |
---|
[4126] | 16 | #from MA import tolist |
---|
| 17 | |
---|
[4175] | 18 | from Scientific.IO.NetCDF import NetCDFFile |
---|
[4223] | 19 | from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL |
---|
[4126] | 20 | from anuga.utilities.numerical_tools import ensure_numeric |
---|
[4223] | 21 | from anuga.coordinate_transforms.geo_reference import Geo_reference, \ |
---|
[4452] | 22 | TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference |
---|
[4126] | 23 | from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm |
---|
[4180] | 24 | from anuga.utilities.anuga_exceptions import ANUGAError |
---|
[5003] | 25 | from anuga.config import points_file_block_line_size as MAX_READ_LINES |
---|
| 26 | #from anuga.fit_interpolate.benchmark_least_squares import mem_usage |
---|
| 27 | |
---|
[4126] | 28 | |
---|
[4535] | 29 | DEFAULT_ATTRIBUTE = 'elevation' |
---|
| 30 | |
---|
[4126] | 31 | class Geospatial_data: |
---|
| 32 | |
---|
| 33 | def __init__(self, |
---|
| 34 | data_points=None, # this can also be a points file name |
---|
| 35 | attributes=None, |
---|
| 36 | geo_reference=None, |
---|
| 37 | default_attribute_name=None, |
---|
| 38 | file_name=None, |
---|
| 39 | latitudes=None, |
---|
| 40 | longitudes=None, |
---|
| 41 | points_are_lats_longs=False, |
---|
[4129] | 42 | max_read_lines=None, |
---|
| 43 | load_file_now=True, |
---|
[4126] | 44 | verbose=False): |
---|
| 45 | |
---|
| 46 | |
---|
| 47 | """ |
---|
| 48 | Create instance from data points and associated attributes |
---|
| 49 | |
---|
| 50 | data_points: x,y coordinates in meters. Type must be either a |
---|
[4179] | 51 | sequence of 2-tuples or an Mx2 Numeric array of floats. A file name |
---|
[4569] | 52 | with extension .txt, .cvs or .pts can also be passed in here. |
---|
[4126] | 53 | |
---|
| 54 | attributes: Associated values for each data point. The type |
---|
| 55 | must be either a list or an array of length M or a dictionary |
---|
| 56 | of lists (or arrays) of length M. In the latter case the keys |
---|
| 57 | in the dictionary represent the attribute names, in the former |
---|
[4535] | 58 | the attribute will get the default name "elevation". |
---|
[4126] | 59 | |
---|
| 60 | geo_reference: Object representing the origin of the data |
---|
| 61 | points. It contains UTM zone, easting and northing and data |
---|
| 62 | points are assumed to be relative to this origin. |
---|
[4179] | 63 | If geo_reference is None, the default geo ref object is used. |
---|
[4126] | 64 | |
---|
| 65 | default_attribute_name: Name of default attribute to be used with |
---|
| 66 | get_attribute_values. The idea is that the dataset can be |
---|
| 67 | equipped with information about which attribute to return. |
---|
| 68 | If None, the default is the "first" |
---|
[4179] | 69 | |
---|
| 70 | latitudes, longitudes: Vectors of latitudes and longitudes, |
---|
| 71 | used to specify location instead of points. |
---|
[4126] | 72 | |
---|
[4179] | 73 | points_are_lats_longs: Set this as true if the points are actually |
---|
| 74 | lats and longs, not UTM |
---|
| 75 | |
---|
| 76 | max_read_lines: The number of rows read into memory when using |
---|
| 77 | blocking to read a file. |
---|
| 78 | |
---|
| 79 | load_file_now: If true the file is automatically loaded |
---|
| 80 | into the geospatial instance. Used when blocking. |
---|
| 81 | |
---|
| 82 | file_name: Name of input netCDF file or .txt file. netCDF file must |
---|
[4126] | 83 | have dimensions "points" etc. |
---|
[4179] | 84 | .txt file is a comma seperated file with x, y and attribute |
---|
[4180] | 85 | data. |
---|
[4126] | 86 | |
---|
[4180] | 87 | The first line has the titles of the columns. The first two |
---|
| 88 | column titles are checked to see if they start with lat or |
---|
| 89 | long (not case sensitive). If so the data is assumed to be |
---|
| 90 | latitude and longitude, in decimal format and converted to |
---|
| 91 | UTM. Otherwise the first two columns are assumed to be the x |
---|
| 92 | and y, and the title names acually used are ignored. |
---|
| 93 | |
---|
| 94 | |
---|
[4179] | 95 | The format for a .txt file is: |
---|
| 96 | 1st line: [column names] |
---|
[4126] | 97 | other lines: x y [attributes] |
---|
| 98 | |
---|
| 99 | for example: |
---|
[4179] | 100 | x, y, elevation, friction |
---|
[4126] | 101 | 0.6, 0.7, 4.9, 0.3 |
---|
| 102 | 1.9, 2.8, 5, 0.3 |
---|
| 103 | 2.7, 2.4, 5.2, 0.3 |
---|
| 104 | |
---|
[4659] | 105 | The first two columns have to be x, y or lat, long |
---|
[4126] | 106 | coordinates. |
---|
[4179] | 107 | |
---|
[4126] | 108 | |
---|
| 109 | The format for a Points dictionary is: |
---|
| 110 | |
---|
| 111 | ['pointlist'] a 2 column array describing points. 1st column x, |
---|
| 112 | 2nd column y. |
---|
| 113 | ['attributelist'], a dictionary of 1D arrays, representing |
---|
| 114 | attribute values at the point. The dictionary key is the attribute |
---|
| 115 | header. |
---|
| 116 | ['geo_reference'] a Geo_refernece object. Use if the point |
---|
| 117 | information is relative. This is optional. |
---|
| 118 | eg |
---|
| 119 | dic['pointlist'] = [[1.0,2.0],[3.0,5.0]] |
---|
| 120 | dic['attributelist']['elevation'] = [[7.0,5.0] |
---|
| 121 | |
---|
| 122 | verbose: |
---|
[4129] | 123 | |
---|
[4126] | 124 | |
---|
| 125 | """ |
---|
| 126 | |
---|
| 127 | if isinstance(data_points, basestring): |
---|
| 128 | # assume data point is really a file name |
---|
| 129 | file_name = data_points |
---|
| 130 | |
---|
| 131 | self.set_verbose(verbose) |
---|
| 132 | self.geo_reference=None #create the attribute |
---|
| 133 | self.file_name = file_name |
---|
[4663] | 134 | |
---|
| 135 | if max_read_lines is None: |
---|
| 136 | self.max_read_lines = MAX_READ_LINES |
---|
| 137 | else: |
---|
| 138 | self.max_read_lines = max_read_lines |
---|
[4179] | 139 | |
---|
[4126] | 140 | if file_name is None: |
---|
| 141 | if latitudes is not None or longitudes is not None or \ |
---|
| 142 | points_are_lats_longs: |
---|
| 143 | data_points, geo_reference = \ |
---|
[4180] | 144 | _set_using_lat_long(latitudes=latitudes, |
---|
[4126] | 145 | longitudes=longitudes, |
---|
| 146 | geo_reference=geo_reference, |
---|
| 147 | data_points=data_points, |
---|
| 148 | points_are_lats_longs=points_are_lats_longs) |
---|
| 149 | self.check_data_points(data_points) |
---|
| 150 | self.set_attributes(attributes) |
---|
| 151 | self.set_geo_reference(geo_reference) |
---|
| 152 | self.set_default_attribute_name(default_attribute_name) |
---|
| 153 | |
---|
[4129] | 154 | elif load_file_now is True: |
---|
[4126] | 155 | # watch for case where file name and points, |
---|
| 156 | # attributes etc are provided!! |
---|
| 157 | # if file name then all provided info will be removed! |
---|
[4576] | 158 | |
---|
[4633] | 159 | if verbose is True: |
---|
| 160 | if file_name is not None: |
---|
| 161 | print 'Loading Geospatial data from file: %s' %file_name |
---|
[4576] | 162 | |
---|
[4660] | 163 | self.import_points_file(file_name, verbose=verbose) |
---|
[4126] | 164 | |
---|
| 165 | self.check_data_points(self.data_points) |
---|
| 166 | self.set_attributes(self.attributes) |
---|
| 167 | self.set_geo_reference(self.geo_reference) |
---|
| 168 | self.set_default_attribute_name(default_attribute_name) |
---|
| 169 | |
---|
[4576] | 170 | if verbose is True: |
---|
| 171 | if file_name is not None: |
---|
| 172 | print 'Geospatial data created from file: %s' %file_name |
---|
| 173 | if load_file_now is False: |
---|
| 174 | print 'Data will be loaded blockwise on demand' |
---|
| 175 | |
---|
| 176 | if file_name.endswith('csv') or file_name.endswith('txt'): |
---|
| 177 | print 'ASCII formats are not that great for ' |
---|
| 178 | print 'blockwise reading. Consider storing this' |
---|
| 179 | print 'data as a pts NetCDF format' |
---|
| 180 | |
---|
[4126] | 181 | def __len__(self): |
---|
| 182 | return len(self.data_points) |
---|
| 183 | |
---|
| 184 | def __repr__(self): |
---|
| 185 | return str(self.get_data_points(absolute=True)) |
---|
| 186 | |
---|
| 187 | |
---|
| 188 | def check_data_points(self, data_points): |
---|
| 189 | """Checks data points |
---|
| 190 | """ |
---|
| 191 | |
---|
| 192 | if data_points is None: |
---|
| 193 | self.data_points = None |
---|
| 194 | msg = 'There is no data or file provided!' |
---|
| 195 | raise ValueError, msg |
---|
| 196 | |
---|
| 197 | else: |
---|
| 198 | self.data_points = ensure_numeric(data_points) |
---|
[4165] | 199 | #print "self.data_points.shape",self.data_points.shape |
---|
| 200 | if not (0,) == self.data_points.shape: |
---|
| 201 | assert len(self.data_points.shape) == 2 |
---|
| 202 | assert self.data_points.shape[1] == 2 |
---|
[4126] | 203 | |
---|
| 204 | def set_attributes(self, attributes): |
---|
| 205 | """Check and assign attributes dictionary |
---|
| 206 | """ |
---|
| 207 | |
---|
| 208 | if attributes is None: |
---|
| 209 | self.attributes = None |
---|
| 210 | return |
---|
| 211 | |
---|
| 212 | if not isinstance(attributes, DictType): |
---|
| 213 | #Convert single attribute into dictionary |
---|
[4535] | 214 | attributes = {DEFAULT_ATTRIBUTE: attributes} |
---|
[4126] | 215 | |
---|
| 216 | #Check input attributes |
---|
| 217 | for key in attributes.keys(): |
---|
| 218 | try: |
---|
| 219 | attributes[key] = ensure_numeric(attributes[key]) |
---|
| 220 | except: |
---|
| 221 | msg = 'Attribute %s could not be converted' %key |
---|
| 222 | msg += 'to a numeric vector' |
---|
| 223 | raise msg |
---|
| 224 | |
---|
| 225 | self.attributes = attributes |
---|
| 226 | |
---|
| 227 | |
---|
| 228 | def set_geo_reference(self, geo_reference): |
---|
[4452] | 229 | """ |
---|
| 230 | Set's the georeference of geospatial. |
---|
| 231 | It can also be used to change the georeference |
---|
| 232 | """ |
---|
[4126] | 233 | from anuga.coordinate_transforms.geo_reference import Geo_reference |
---|
| 234 | |
---|
| 235 | if geo_reference is None: |
---|
| 236 | geo_reference = Geo_reference() # Use default |
---|
[4452] | 237 | geo_reference = ensure_geo_reference(geo_reference) |
---|
[4126] | 238 | if not isinstance(geo_reference, Geo_reference): |
---|
| 239 | msg = 'Argument geo_reference must be a valid Geo_reference \n' |
---|
| 240 | msg += 'object or None.' |
---|
| 241 | raise msg |
---|
| 242 | |
---|
| 243 | # if a geo ref already exists, change the point data to |
---|
| 244 | # represent the new geo-ref |
---|
| 245 | if self.geo_reference is not None: |
---|
| 246 | #FIXME: Maybe put out a warning here... |
---|
| 247 | self.data_points = self.get_data_points \ |
---|
| 248 | (geo_reference=geo_reference) |
---|
| 249 | |
---|
| 250 | self.geo_reference = geo_reference |
---|
| 251 | |
---|
| 252 | |
---|
| 253 | def set_default_attribute_name(self, default_attribute_name): |
---|
| 254 | self.default_attribute_name = default_attribute_name |
---|
| 255 | |
---|
| 256 | def set_verbose(self, verbose=False): |
---|
| 257 | if verbose in [False, True]: |
---|
| 258 | self.verbose = verbose |
---|
| 259 | else: |
---|
| 260 | msg = 'Illegal value: %s' %str(verbose) |
---|
[4255] | 261 | raise Exception, msg |
---|
[4126] | 262 | |
---|
[5010] | 263 | def clip(self, polygon, closed=True, verbose=False): |
---|
[4126] | 264 | """Clip geospatial data by a polygon |
---|
| 265 | |
---|
| 266 | Input |
---|
| 267 | polygon - Either a list of points, an Nx2 array or |
---|
| 268 | a Geospatial data object. |
---|
| 269 | closed - (optional) determine whether points on boundary should be |
---|
| 270 | regarded as belonging to the polygon (closed = True) |
---|
| 271 | or not (closed = False). Default is True. |
---|
| 272 | |
---|
| 273 | Output |
---|
| 274 | New geospatial data object representing points inside |
---|
| 275 | specified polygon. |
---|
| 276 | |
---|
| 277 | """ |
---|
| 278 | |
---|
| 279 | from anuga.utilities.polygon import inside_polygon |
---|
| 280 | |
---|
| 281 | if isinstance(polygon, Geospatial_data): |
---|
| 282 | # Polygon is an object - extract points |
---|
| 283 | polygon = polygon.get_data_points() |
---|
| 284 | |
---|
| 285 | points = self.get_data_points() |
---|
[5146] | 286 | # if verbose: print '%s points:%s' %(verbose,points) |
---|
[5010] | 287 | inside_indices = inside_polygon(points, polygon, closed, verbose) |
---|
[4126] | 288 | |
---|
| 289 | clipped_G = self.get_sample(inside_indices) |
---|
[5146] | 290 | |
---|
[4126] | 291 | # clipped_points = take(points, inside_indices) |
---|
| 292 | |
---|
| 293 | # Clip all attributes |
---|
| 294 | # attributes = self.get_all_attributes() |
---|
| 295 | |
---|
| 296 | # clipped_attributes = {} |
---|
| 297 | # if attributes is not None: |
---|
| 298 | # for key, att in attributes.items(): |
---|
| 299 | # clipped_attributes[key] = take(att, inside_indices) |
---|
| 300 | |
---|
| 301 | # return Geospatial_data(clipped_points, clipped_attributes) |
---|
| 302 | return clipped_G |
---|
| 303 | |
---|
| 304 | |
---|
[5010] | 305 | def clip_outside(self, polygon, closed=True,verbose=False): |
---|
[4126] | 306 | """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon |
---|
| 307 | |
---|
| 308 | Input |
---|
| 309 | polygon - Either a list of points, an Nx2 array or |
---|
| 310 | a Geospatial data object. |
---|
| 311 | closed - (optional) determine whether points on boundary should be |
---|
| 312 | regarded as belonging to the polygon (closed = True) |
---|
| 313 | or not (closed = False). Default is True. |
---|
| 314 | |
---|
| 315 | Output |
---|
| 316 | Geospatial data object representing point OUTSIDE specified polygon |
---|
| 317 | |
---|
| 318 | """ |
---|
| 319 | |
---|
| 320 | from anuga.utilities.polygon import outside_polygon |
---|
| 321 | |
---|
| 322 | if isinstance(polygon, Geospatial_data): |
---|
| 323 | # Polygon is an object - extract points |
---|
| 324 | polygon = polygon.get_data_points() |
---|
| 325 | |
---|
| 326 | points = self.get_data_points() |
---|
[5010] | 327 | outside_indices = outside_polygon(points, polygon, closed,verbose) |
---|
[4126] | 328 | |
---|
| 329 | clipped_G = self.get_sample(outside_indices) |
---|
| 330 | |
---|
| 331 | # clipped_points = take(points, outside_indices) |
---|
| 332 | |
---|
| 333 | # Clip all attributes |
---|
| 334 | # attributes = self.get_all_attributes() |
---|
| 335 | |
---|
| 336 | # clipped_attributes = {} |
---|
| 337 | # if attributes is not None: |
---|
| 338 | # for key, att in attributes.items(): |
---|
| 339 | # clipped_attributes[key] = take(att, outside_indices) |
---|
| 340 | |
---|
| 341 | # return Geospatial_data(clipped_points, clipped_attributes) |
---|
| 342 | return clipped_G |
---|
| 343 | |
---|
| 344 | |
---|
| 345 | def get_geo_reference(self): |
---|
| 346 | return self.geo_reference |
---|
| 347 | |
---|
[4223] | 348 | def get_data_points(self, absolute=True, geo_reference=None, |
---|
[4641] | 349 | as_lat_long=False,isSouthHemisphere=True): |
---|
[4202] | 350 | """Get coordinates for all data points as an Nx2 array |
---|
[4126] | 351 | |
---|
[4194] | 352 | If absolute is False returned coordinates are relative to the |
---|
| 353 | internal georeference's xll and yll corners, otherwise |
---|
| 354 | absolute UTM coordinates are returned. |
---|
[4126] | 355 | |
---|
| 356 | If a geo_reference is passed the points are returned relative |
---|
| 357 | to that geo_reference. |
---|
[4640] | 358 | |
---|
| 359 | isSH (isSouthHemisphere) is only used when getting data |
---|
| 360 | points "as_lat_long" is True and if FALSE will return lats and |
---|
| 361 | longs valid for the Northern Hemisphere. |
---|
[4126] | 362 | |
---|
| 363 | Default: absolute is True. |
---|
| 364 | """ |
---|
[4223] | 365 | if as_lat_long is True: |
---|
| 366 | msg = "Points need a zone to be converted into lats and longs" |
---|
| 367 | assert self.geo_reference is not None, msg |
---|
| 368 | zone = self.geo_reference.get_zone() |
---|
| 369 | assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg |
---|
| 370 | lats_longs = [] |
---|
| 371 | for point in self.get_data_points(True): |
---|
| 372 | ### UTMtoLL(northing, easting, zone, |
---|
[4640] | 373 | lat_calced, long_calced = UTMtoLL(point[1],point[0], |
---|
[4641] | 374 | zone, isSouthHemisphere) |
---|
[4225] | 375 | lats_longs.append((lat_calced, long_calced)) # to hash |
---|
[4223] | 376 | return lats_longs |
---|
[4126] | 377 | if absolute is True and geo_reference is None: |
---|
| 378 | return self.geo_reference.get_absolute(self.data_points) |
---|
| 379 | elif geo_reference is not None: |
---|
| 380 | return geo_reference.change_points_geo_ref \ |
---|
| 381 | (self.data_points, |
---|
| 382 | self.geo_reference) |
---|
| 383 | else: |
---|
| 384 | return self.data_points |
---|
[5146] | 385 | |
---|
[4126] | 386 | |
---|
| 387 | def get_attributes(self, attribute_name=None): |
---|
| 388 | """Return values for one named attribute. |
---|
| 389 | |
---|
| 390 | If attribute_name is None, default_attribute_name is used |
---|
| 391 | """ |
---|
| 392 | |
---|
| 393 | if attribute_name is None: |
---|
| 394 | if self.default_attribute_name is not None: |
---|
| 395 | attribute_name = self.default_attribute_name |
---|
| 396 | else: |
---|
| 397 | attribute_name = self.attributes.keys()[0] |
---|
| 398 | # above line takes the first one from keys |
---|
| 399 | |
---|
[4255] | 400 | if self.verbose is True: |
---|
| 401 | print 'Using attribute %s' %attribute_name |
---|
| 402 | print 'Available attributes: %s' %(self.attributes.keys()) |
---|
[4126] | 403 | |
---|
| 404 | msg = 'Attribute name %s does not exist in data set' %attribute_name |
---|
| 405 | assert self.attributes.has_key(attribute_name), msg |
---|
| 406 | |
---|
| 407 | return self.attributes[attribute_name] |
---|
| 408 | |
---|
| 409 | def get_all_attributes(self): |
---|
| 410 | """ |
---|
| 411 | Return values for all attributes. |
---|
| 412 | The return value is either None or a dictionary (possibly empty). |
---|
| 413 | """ |
---|
| 414 | |
---|
| 415 | return self.attributes |
---|
| 416 | |
---|
| 417 | def __add__(self, other): |
---|
| 418 | """ |
---|
| 419 | Returns the addition of 2 geospatical objects, |
---|
| 420 | objects are concatencated to the end of each other |
---|
| 421 | |
---|
| 422 | NOTE: doesn't add if objects contain different |
---|
| 423 | attributes |
---|
| 424 | |
---|
[4484] | 425 | Always return absolute points! |
---|
[4549] | 426 | This alse means, that if you add None to the object, |
---|
| 427 | it will be turned into absolute coordinates |
---|
| 428 | |
---|
| 429 | other can be None in which case nothing is added to self. |
---|
[4126] | 430 | """ |
---|
| 431 | |
---|
[4549] | 432 | |
---|
[4126] | 433 | # find objects zone and checks if the same |
---|
| 434 | geo_ref1 = self.get_geo_reference() |
---|
| 435 | zone1 = geo_ref1.get_zone() |
---|
| 436 | |
---|
| 437 | |
---|
[4549] | 438 | if other is not None: |
---|
[4126] | 439 | |
---|
[4549] | 440 | geo_ref2 = other.get_geo_reference() |
---|
| 441 | zone2 = geo_ref2.get_zone() |
---|
[4126] | 442 | |
---|
[4549] | 443 | geo_ref1.reconcile_zones(geo_ref2) |
---|
[4126] | 444 | |
---|
| 445 | |
---|
[4549] | 446 | new_points = concatenate((self.get_data_points(absolute=True), |
---|
| 447 | other.get_data_points(absolute=True)), |
---|
| 448 | axis = 0) |
---|
[4126] | 449 | |
---|
| 450 | |
---|
[4549] | 451 | |
---|
| 452 | # Concatenate attributes if any |
---|
| 453 | if self.attributes is None: |
---|
| 454 | if other.attributes is not None: |
---|
| 455 | msg = 'Geospatial data must have the same \n' |
---|
| 456 | msg += 'attributes to allow addition.' |
---|
| 457 | raise Exception, msg |
---|
| 458 | |
---|
| 459 | new_attributes = None |
---|
| 460 | else: |
---|
| 461 | new_attributes = {} |
---|
| 462 | for x in self.attributes.keys(): |
---|
| 463 | if other.attributes.has_key(x): |
---|
[4126] | 464 | |
---|
[4549] | 465 | attrib1 = self.attributes[x] |
---|
| 466 | attrib2 = other.attributes[x] |
---|
| 467 | new_attributes[x] = concatenate((attrib1, attrib2)) |
---|
[4484] | 468 | |
---|
[4549] | 469 | else: |
---|
| 470 | msg = 'Geospatial data must have the same \n' |
---|
| 471 | msg += 'attributes to allow addition.' |
---|
| 472 | raise Exception, msg |
---|
| 473 | |
---|
| 474 | |
---|
| 475 | else: |
---|
| 476 | #other is None: |
---|
[4126] | 477 | |
---|
[4549] | 478 | new_points = self.get_data_points(absolute=True) |
---|
| 479 | new_attributes = self.attributes |
---|
[4126] | 480 | |
---|
[4549] | 481 | |
---|
[4126] | 482 | |
---|
[4484] | 483 | # Instantiate new data object and return absolute coordinates |
---|
| 484 | new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0) |
---|
[4126] | 485 | return Geospatial_data(new_points, |
---|
| 486 | new_attributes, |
---|
| 487 | new_geo_ref) |
---|
[4549] | 488 | |
---|
| 489 | |
---|
| 490 | def __radd__(self, other): |
---|
| 491 | """Handle cases like None + Geospatial_data(...) |
---|
| 492 | """ |
---|
| 493 | |
---|
| 494 | return self + other |
---|
| 495 | |
---|
[4126] | 496 | |
---|
[4549] | 497 | |
---|
[4126] | 498 | ### |
---|
| 499 | # IMPORT/EXPORT POINTS FILES |
---|
| 500 | ### |
---|
| 501 | |
---|
| 502 | def import_points_file(self, file_name, delimiter=None, verbose=False): |
---|
[4659] | 503 | """ load an .txt, .csv or .pts file |
---|
| 504 | Note: will throw an IOError/SyntaxError if it can't load the file. |
---|
[4126] | 505 | Catch these! |
---|
| 506 | |
---|
| 507 | Post condition: self.attributes dictionary has been set |
---|
| 508 | """ |
---|
| 509 | |
---|
| 510 | if access(file_name, F_OK) == 0 : |
---|
| 511 | msg = 'File %s does not exist or is not accessible' %file_name |
---|
| 512 | raise IOError, msg |
---|
| 513 | |
---|
| 514 | attributes = {} |
---|
[4659] | 515 | if file_name[-4:]== ".pts": |
---|
[4126] | 516 | try: |
---|
| 517 | data_points, attributes, geo_reference =\ |
---|
| 518 | _read_pts_file(file_name, verbose) |
---|
| 519 | except IOError, e: |
---|
| 520 | msg = 'Could not open file %s ' %file_name |
---|
| 521 | raise IOError, msg |
---|
| 522 | |
---|
[4129] | 523 | elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv": |
---|
[4126] | 524 | try: |
---|
| 525 | data_points, attributes, geo_reference =\ |
---|
| 526 | _read_csv_file(file_name, verbose) |
---|
[4470] | 527 | except IOError, e: |
---|
| 528 | # This should only be if a file is not found |
---|
| 529 | msg = 'Could not open file %s. ' %file_name |
---|
| 530 | msg += 'Check the file location.' |
---|
| 531 | raise IOError, msg |
---|
| 532 | except SyntaxError, e: |
---|
| 533 | # This should only be if there is a format error |
---|
| 534 | msg = 'Could not open file %s. \n' %file_name |
---|
| 535 | msg += Error_message['IOError'] |
---|
| 536 | #print "msg", msg |
---|
| 537 | raise SyntaxError, msg |
---|
[4126] | 538 | else: |
---|
| 539 | msg = 'Extension %s is unknown' %file_name[-4:] |
---|
| 540 | raise IOError, msg |
---|
| 541 | # print'in import data_points', data_points |
---|
| 542 | # print'in import attributes', attributes |
---|
| 543 | # print'in import data_points', geo_reference |
---|
| 544 | self.data_points = data_points |
---|
| 545 | self.attributes = attributes |
---|
| 546 | self.geo_reference = geo_reference |
---|
| 547 | |
---|
| 548 | # return all_data |
---|
| 549 | |
---|
[4641] | 550 | def export_points_file(self, file_name, absolute=True, |
---|
| 551 | as_lat_long=False, isSouthHemisphere=True): |
---|
[4126] | 552 | |
---|
| 553 | """ |
---|
[4663] | 554 | write a points file, file_name, as a text (.csv) or binary (.pts) file |
---|
[4126] | 555 | file_name is the file name, including the extension |
---|
| 556 | The point_dict is defined at the top of this file. |
---|
| 557 | |
---|
[4452] | 558 | If absolute is True data the xll and yll are added to the points value |
---|
| 559 | and the xll and yll of the geo_reference are set to 0. |
---|
[4126] | 560 | |
---|
| 561 | If absolute is False data points at returned as relative to the xll |
---|
| 562 | and yll and geo_reference remains uneffected |
---|
[4641] | 563 | |
---|
| 564 | isSouthHemisphere: is only used when getting data |
---|
| 565 | points "as_lat_long" is True and if FALSE will return lats and |
---|
| 566 | longs valid for the Northern Hemisphere. |
---|
| 567 | |
---|
[4126] | 568 | """ |
---|
[4150] | 569 | |
---|
[4659] | 570 | if (file_name[-4:] == ".pts"): |
---|
[4126] | 571 | if absolute is True: |
---|
[4452] | 572 | geo_ref = deepcopy(self.geo_reference) |
---|
| 573 | geo_ref.xllcorner = 0 |
---|
| 574 | geo_ref.yllcorner = 0 |
---|
[4126] | 575 | _write_pts_file(file_name, |
---|
| 576 | self.get_data_points(absolute), |
---|
[4452] | 577 | self.get_all_attributes(), |
---|
| 578 | geo_ref) |
---|
[4126] | 579 | else: |
---|
| 580 | _write_pts_file(file_name, |
---|
| 581 | self.get_data_points(absolute), |
---|
| 582 | self.get_all_attributes(), |
---|
| 583 | self.get_geo_reference()) |
---|
[4150] | 584 | |
---|
[4165] | 585 | elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv": |
---|
[4154] | 586 | msg = "ERROR: trying to write a .txt file with relative data." |
---|
| 587 | assert absolute, msg |
---|
| 588 | _write_csv_file(file_name, |
---|
[4225] | 589 | self.get_data_points(absolute=True, |
---|
[4641] | 590 | as_lat_long=as_lat_long, |
---|
| 591 | isSouthHemisphere=isSouthHemisphere), |
---|
[4225] | 592 | self.get_all_attributes(), |
---|
| 593 | as_lat_long=as_lat_long) |
---|
[4238] | 594 | |
---|
| 595 | elif file_name[-4:] == ".urs" : |
---|
| 596 | msg = "ERROR: Can not write a .urs file as a relative file." |
---|
| 597 | assert absolute, msg |
---|
| 598 | _write_urs_file(file_name, |
---|
[4641] | 599 | self.get_data_points(as_lat_long=True, |
---|
| 600 | isSouthHemisphere=isSouthHemisphere)) |
---|
[4238] | 601 | |
---|
[4126] | 602 | else: |
---|
| 603 | msg = 'Unknown file type %s ' %file_name |
---|
| 604 | raise IOError, msg |
---|
| 605 | |
---|
| 606 | def get_sample(self, indices): |
---|
| 607 | """ Returns a object which is a subset of the original |
---|
| 608 | and the data points and attributes in this new object refer to |
---|
| 609 | the indices provided |
---|
| 610 | |
---|
| 611 | Input |
---|
| 612 | indices- a list of integers that represent the new object |
---|
| 613 | Output |
---|
| 614 | New geospatial data object representing points specified by |
---|
| 615 | the indices |
---|
| 616 | """ |
---|
| 617 | #FIXME: add the geo_reference to this |
---|
[5146] | 618 | # print 'hello from get_sample' |
---|
[4126] | 619 | points = self.get_data_points() |
---|
| 620 | sampled_points = take(points, indices) |
---|
| 621 | |
---|
| 622 | attributes = self.get_all_attributes() |
---|
| 623 | |
---|
| 624 | sampled_attributes = {} |
---|
| 625 | if attributes is not None: |
---|
| 626 | for key, att in attributes.items(): |
---|
| 627 | sampled_attributes[key] = take(att, indices) |
---|
| 628 | |
---|
[5146] | 629 | # print 'goodbye from get_sample' |
---|
| 630 | |
---|
[4126] | 631 | return Geospatial_data(sampled_points, sampled_attributes) |
---|
| 632 | |
---|
| 633 | |
---|
[4744] | 634 | def split(self, factor=0.5,seed_num=None, verbose=False): |
---|
[5010] | 635 | """Returns two |
---|
[4659] | 636 | geospatial_data object, first is the size of the 'factor' |
---|
| 637 | smaller the original and the second is the remainder. The two |
---|
| 638 | new object are disjoin set of each other. |
---|
[4126] | 639 | |
---|
| 640 | Points of the two new object have selected RANDOMLY. |
---|
| 641 | |
---|
[4659] | 642 | This method create two lists of indices which are passed into |
---|
| 643 | get_sample. The lists are created using random numbers, and |
---|
| 644 | they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9) |
---|
| 645 | random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8) |
---|
[4501] | 646 | |
---|
[4126] | 647 | Input - the factor which to split the object, if 0.1 then 10% of the |
---|
[4744] | 648 | together object will be returned |
---|
[4126] | 649 | |
---|
| 650 | Output - two geospatial_data objects that are disjoint sets of the |
---|
| 651 | original |
---|
| 652 | """ |
---|
| 653 | |
---|
| 654 | i=0 |
---|
| 655 | self_size = len(self) |
---|
| 656 | random_list = [] |
---|
| 657 | remainder_list = [] |
---|
| 658 | new_size = round(factor*self_size) |
---|
| 659 | |
---|
[4776] | 660 | # Find unique random numbers |
---|
[4195] | 661 | if verbose: print "make unique random number list and get indices" |
---|
[4126] | 662 | |
---|
[4501] | 663 | total=array(range(self_size)) |
---|
| 664 | total_list = total.tolist() |
---|
| 665 | if verbose: print "total list len",len(total_list) |
---|
| 666 | |
---|
[4776] | 667 | # There will be repeated random numbers however will not be a |
---|
| 668 | # problem as they are being 'pop'ed out of array so if there |
---|
| 669 | # are two numbers the same they will pop different indicies, |
---|
| 670 | # still basically random |
---|
[4501] | 671 | ## create list of non-unquie random numbers |
---|
| 672 | if verbose: print "create random numbers list %s long" %new_size |
---|
[4744] | 673 | |
---|
[4776] | 674 | # Set seed if provided, mainly important for unit test! |
---|
[4816] | 675 | # plus recalcule seed when no seed provided. |
---|
[4744] | 676 | if seed_num != None: |
---|
| 677 | seed(seed_num,seed_num) |
---|
[4816] | 678 | else: |
---|
| 679 | seed() |
---|
| 680 | if verbose: print "seed:", get_seed() |
---|
[4744] | 681 | |
---|
[5010] | 682 | #print 'size',self_size, new_size |
---|
[4501] | 683 | random_num = randint(0,self_size-1,(int(new_size),)) |
---|
[5010] | 684 | #print 'random num',random_num |
---|
[4501] | 685 | random_num = random_num.tolist() |
---|
[4126] | 686 | |
---|
| 687 | #need to sort and reverse so the pop() works correctly |
---|
[4501] | 688 | random_num.sort() |
---|
| 689 | random_num.reverse() |
---|
| 690 | |
---|
| 691 | if verbose: print "make random number list and get indices" |
---|
| 692 | j=0 |
---|
| 693 | k=1 |
---|
| 694 | remainder_list = total_list[:] |
---|
[4776] | 695 | # pops array index (random_num) from remainder_list |
---|
[4659] | 696 | # (which starts as the |
---|
[4776] | 697 | # total_list and appends to random_list |
---|
[4501] | 698 | random_num_len = len(random_num) |
---|
| 699 | for i in random_num: |
---|
| 700 | random_list.append(remainder_list.pop(i)) |
---|
| 701 | j+=1 |
---|
| 702 | #prints progress |
---|
| 703 | if verbose and round(random_num_len/10*k)==j: |
---|
| 704 | print '(%s/%s)' %(j, random_num_len) |
---|
| 705 | k+=1 |
---|
| 706 | |
---|
[4776] | 707 | # FIXME: move to tests, it might take a long time |
---|
| 708 | # then create an array of random lenght between 500 and 1000, |
---|
| 709 | # and use a random factor between 0 and 1 |
---|
| 710 | # setup for assertion |
---|
[4501] | 711 | test_total = random_list[:] |
---|
| 712 | test_total.extend(remainder_list) |
---|
| 713 | test_total.sort() |
---|
[4659] | 714 | msg = 'The two random lists made from the original list when added '\ |
---|
| 715 | 'together DO NOT equal the original list' |
---|
[4501] | 716 | assert (total_list==test_total),msg |
---|
| 717 | |
---|
[4776] | 718 | # Get new samples |
---|
[4195] | 719 | if verbose: print "get values of indices for random list" |
---|
[4126] | 720 | G1 = self.get_sample(random_list) |
---|
[4195] | 721 | if verbose: print "get values of indices for opposite of random list" |
---|
[4126] | 722 | G2 = self.get_sample(remainder_list) |
---|
| 723 | |
---|
| 724 | return G1, G2 |
---|
| 725 | |
---|
| 726 | def __iter__(self): |
---|
[4569] | 727 | """Read in the header, number_of_points and save the |
---|
| 728 | file pointer position |
---|
[4175] | 729 | """ |
---|
[4126] | 730 | |
---|
[4175] | 731 | from Scientific.IO.NetCDF import NetCDFFile |
---|
| 732 | |
---|
[4126] | 733 | #FIXME - what to do if the file isn't there |
---|
[4129] | 734 | |
---|
[4576] | 735 | # FIXME (Ole): Shouldn't this go into the constructor? |
---|
[4663] | 736 | # This method acts like the constructor when blcoking. |
---|
[4576] | 737 | # ... and shouldn't it be called block_size? |
---|
[4663] | 738 | # |
---|
[4576] | 739 | if self.max_read_lines is None: |
---|
| 740 | self.max_read_lines = MAX_READ_LINES |
---|
| 741 | |
---|
[4659] | 742 | if self.file_name[-4:] == ".pts": |
---|
[4573] | 743 | |
---|
[4776] | 744 | # See if the file is there. Throw a QUIET IO error if it isn't |
---|
[4175] | 745 | fd = open(self.file_name,'r') |
---|
| 746 | fd.close() |
---|
| 747 | |
---|
[4776] | 748 | # Throws prints to screen if file not present |
---|
[4175] | 749 | self.fid = NetCDFFile(self.file_name, 'r') |
---|
| 750 | |
---|
[4576] | 751 | self.blocking_georef, self.blocking_keys, self.number_of_points =\ |
---|
| 752 | _read_pts_file_header(self.fid, |
---|
| 753 | self.verbose) |
---|
[4569] | 754 | self.start_row = 0 |
---|
[4576] | 755 | self.last_row = self.number_of_points |
---|
[4252] | 756 | self.show_verbose = 0 |
---|
[4569] | 757 | self.verbose_block_size = (self.last_row + 10)/10 |
---|
[4576] | 758 | self.block_number = 0 |
---|
| 759 | self.number_of_blocks = self.number_of_points/self.max_read_lines |
---|
| 760 | # This computes the number of full blocks. The last block may be |
---|
| 761 | # smaller and won't be ircluded in this estimate. |
---|
[4569] | 762 | |
---|
| 763 | if self.verbose is True: |
---|
[4576] | 764 | print 'Reading %d points (in ~%d blocks) from file %s. '\ |
---|
| 765 | %(self.number_of_points, |
---|
| 766 | self.number_of_blocks, |
---|
| 767 | self.file_name), |
---|
| 768 | print 'Each block consists of %d data points'\ |
---|
| 769 | %self.max_read_lines |
---|
[4569] | 770 | |
---|
[4174] | 771 | else: |
---|
[4776] | 772 | # Assume the file is a csv file |
---|
[4174] | 773 | file_pointer = open(self.file_name) |
---|
| 774 | self.header, self.file_pointer = \ |
---|
| 775 | _read_csv_file_header(file_pointer) |
---|
[4180] | 776 | self.blocking_georef = None # Used for reconciling zones |
---|
[4576] | 777 | |
---|
[4126] | 778 | return self |
---|
| 779 | |
---|
[4576] | 780 | |
---|
[4126] | 781 | def next(self): |
---|
[4569] | 782 | """ read a block, instanciate a new geospatial and return it""" |
---|
| 783 | |
---|
[4659] | 784 | if self.file_name[-4:] == ".pts": |
---|
[4175] | 785 | if self.start_row == self.last_row: |
---|
[4776] | 786 | # Read the end of the file last iteration |
---|
| 787 | # Remove blocking attributes |
---|
[4175] | 788 | self.fid.close() |
---|
[4179] | 789 | del self.max_read_lines |
---|
| 790 | del self.blocking_georef |
---|
| 791 | del self.last_row |
---|
| 792 | del self.start_row |
---|
| 793 | del self.blocking_keys |
---|
| 794 | del self.fid |
---|
[4175] | 795 | raise StopIteration |
---|
| 796 | fin_row = self.start_row + self.max_read_lines |
---|
| 797 | if fin_row > self.last_row: |
---|
| 798 | fin_row = self.last_row |
---|
| 799 | |
---|
[4576] | 800 | |
---|
[4252] | 801 | |
---|
| 802 | if self.verbose is True: |
---|
| 803 | if self.show_verbose >= self.start_row and \ |
---|
[4569] | 804 | self.show_verbose < fin_row: |
---|
[4576] | 805 | |
---|
| 806 | |
---|
| 807 | #print 'Doing %d of %d' %(self.start_row, self.last_row+10) |
---|
| 808 | |
---|
| 809 | print 'Reading block %d (points %d to %d) out of %d'\ |
---|
| 810 | %(self.block_number, |
---|
| 811 | self.start_row, |
---|
| 812 | fin_row, |
---|
| 813 | self.number_of_blocks) |
---|
| 814 | |
---|
| 815 | |
---|
| 816 | self.show_verbose += max(self.max_read_lines, |
---|
| 817 | self.verbose_block_size) |
---|
| 818 | |
---|
| 819 | |
---|
| 820 | # Read next block |
---|
[4175] | 821 | pointlist, att_dict, = \ |
---|
[4576] | 822 | _read_pts_file_blocking(self.fid, |
---|
| 823 | self.start_row, |
---|
| 824 | fin_row, |
---|
| 825 | self.blocking_keys) |
---|
[4569] | 826 | |
---|
[4175] | 827 | geo = Geospatial_data(pointlist, att_dict, self.blocking_georef) |
---|
| 828 | self.start_row = fin_row |
---|
| 829 | |
---|
[4576] | 830 | self.block_number += 1 |
---|
| 831 | |
---|
[4174] | 832 | else: |
---|
[4776] | 833 | # Assume the file is a csv file |
---|
[4174] | 834 | try: |
---|
[4180] | 835 | pointlist, att_dict, geo_ref, self.file_pointer = \ |
---|
[4174] | 836 | _read_csv_file_blocking( self.file_pointer, |
---|
[4175] | 837 | self.header[:], |
---|
| 838 | max_read_lines=self.max_read_lines, |
---|
| 839 | verbose=self.verbose) |
---|
[4180] | 840 | |
---|
| 841 | # Check that the zones haven't changed. |
---|
| 842 | if geo_ref is not None: |
---|
| 843 | geo_ref.reconcile_zones(self.blocking_georef) |
---|
| 844 | self.blocking_georef = geo_ref |
---|
| 845 | elif self.blocking_georef is not None: |
---|
| 846 | |
---|
| 847 | msg = 'Geo reference given, then not given.' |
---|
| 848 | msg += ' This should not happen.' |
---|
| 849 | raise ValueError, msg |
---|
| 850 | geo = Geospatial_data(pointlist, att_dict, geo_ref) |
---|
[4174] | 851 | except StopIteration: |
---|
| 852 | self.file_pointer.close() |
---|
[4179] | 853 | del self.header |
---|
| 854 | del self.file_pointer |
---|
[4174] | 855 | raise StopIteration |
---|
[4180] | 856 | except ANUGAError: |
---|
| 857 | self.file_pointer.close() |
---|
| 858 | del self.header |
---|
| 859 | del self.file_pointer |
---|
| 860 | raise |
---|
[4470] | 861 | except SyntaxError: |
---|
| 862 | self.file_pointer.close() |
---|
| 863 | del self.header |
---|
| 864 | del self.file_pointer |
---|
| 865 | # This should only be if there is a format error |
---|
| 866 | msg = 'Could not open file %s. \n' %self.file_name |
---|
| 867 | msg += Error_message['IOError'] |
---|
| 868 | raise SyntaxError, msg |
---|
[4174] | 869 | return geo |
---|
[4776] | 870 | |
---|
| 871 | |
---|
[4470] | 872 | ##################### Error messages ########### |
---|
| 873 | Error_message = {} |
---|
| 874 | Em = Error_message |
---|
[4633] | 875 | Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n" |
---|
[4481] | 876 | Em['IOError'] += " 1st line: [column names]\n" |
---|
| 877 | Em['IOError'] += " other lines: [x value], [y value], [attributes]\n" |
---|
[4470] | 878 | Em['IOError'] += "\n" |
---|
| 879 | Em['IOError'] += " for example:\n" |
---|
| 880 | Em['IOError'] += " x, y, elevation, friction\n" |
---|
| 881 | Em['IOError'] += " 0.6, 0.7, 4.9, 0.3\n" |
---|
| 882 | Em['IOError'] += " 1.9, 2.8, 5, 0.3\n" |
---|
| 883 | Em['IOError'] += " 2.7, 2.4, 5.2, 0.3\n" |
---|
| 884 | Em['IOError'] += "\n" |
---|
| 885 | Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n" |
---|
[4481] | 886 | Em['IOError'] += "The attribute values must be numeric.\n" |
---|
[4126] | 887 | |
---|
[4180] | 888 | def _set_using_lat_long(latitudes, |
---|
| 889 | longitudes, |
---|
| 890 | geo_reference, |
---|
| 891 | data_points, |
---|
| 892 | points_are_lats_longs): |
---|
| 893 | """ |
---|
| 894 | if the points has lat long info, assume it is in (lat, long) order. |
---|
| 895 | """ |
---|
| 896 | |
---|
| 897 | if geo_reference is not None: |
---|
| 898 | msg = """A georeference is specified yet latitude and longitude |
---|
| 899 | are also specified!""" |
---|
| 900 | raise ValueError, msg |
---|
| 901 | |
---|
| 902 | if data_points is not None and not points_are_lats_longs: |
---|
| 903 | msg = """Data points are specified yet latitude and |
---|
[5253] | 904 | longitude are also specified.""" |
---|
[4180] | 905 | raise ValueError, msg |
---|
| 906 | |
---|
| 907 | if points_are_lats_longs: |
---|
| 908 | if data_points is None: |
---|
[5253] | 909 | msg = """Data points are not specified.""" |
---|
[4180] | 910 | raise ValueError, msg |
---|
| 911 | lats_longs = ensure_numeric(data_points) |
---|
| 912 | latitudes = ravel(lats_longs[:,0:1]) |
---|
| 913 | longitudes = ravel(lats_longs[:,1:]) |
---|
| 914 | |
---|
| 915 | if latitudes is None and longitudes is None: |
---|
[5253] | 916 | msg = """Latitudes and Longitudes are not specified.""" |
---|
[4180] | 917 | raise ValueError, msg |
---|
| 918 | |
---|
| 919 | if latitudes is None: |
---|
[5253] | 920 | msg = """Longitudes are specified yet latitudes aren't.""" |
---|
[4180] | 921 | raise ValueError, msg |
---|
| 922 | |
---|
| 923 | if longitudes is None: |
---|
[5253] | 924 | msg = """Latitudes are specified yet longitudes aren't.""" |
---|
[4180] | 925 | raise ValueError, msg |
---|
| 926 | |
---|
| 927 | data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes, |
---|
| 928 | longitudes=longitudes) |
---|
| 929 | return data_points, Geo_reference(zone=zone) |
---|
[4569] | 930 | |
---|
[4180] | 931 | |
---|
[4126] | 932 | def _read_pts_file(file_name, verbose=False): |
---|
| 933 | """Read .pts NetCDF file |
---|
| 934 | |
---|
| 935 | Return a dic of array of points, and dic of array of attribute |
---|
| 936 | eg |
---|
| 937 | dic['points'] = [[1.0,2.0],[3.0,5.0]] |
---|
| 938 | dic['attributelist']['elevation'] = [[7.0,5.0] |
---|
| 939 | """ |
---|
| 940 | |
---|
| 941 | from Scientific.IO.NetCDF import NetCDFFile |
---|
| 942 | |
---|
| 943 | if verbose: print 'Reading ', file_name |
---|
| 944 | |
---|
| 945 | |
---|
[4776] | 946 | # See if the file is there. Throw a QUIET IO error if it isn't |
---|
[4126] | 947 | fd = open(file_name,'r') |
---|
| 948 | fd.close() |
---|
| 949 | |
---|
[4776] | 950 | # Throws prints to screen if file not present |
---|
[4126] | 951 | fid = NetCDFFile(file_name, 'r') |
---|
| 952 | |
---|
| 953 | pointlist = array(fid.variables['points']) |
---|
| 954 | keys = fid.variables.keys() |
---|
| 955 | if verbose: print 'Got %d variables: %s' %(len(keys), keys) |
---|
| 956 | try: |
---|
| 957 | keys.remove('points') |
---|
| 958 | except IOError, e: |
---|
| 959 | fid.close() |
---|
| 960 | msg = 'Expected keyword "points" but could not find it' |
---|
| 961 | raise IOError, msg |
---|
| 962 | |
---|
| 963 | attributes = {} |
---|
| 964 | for key in keys: |
---|
| 965 | if verbose: print "reading attribute '%s'" %key |
---|
| 966 | |
---|
| 967 | attributes[key] = array(fid.variables[key]) |
---|
| 968 | |
---|
| 969 | |
---|
| 970 | try: |
---|
| 971 | geo_reference = Geo_reference(NetCDFObject=fid) |
---|
| 972 | except AttributeError, e: |
---|
| 973 | geo_reference = None |
---|
| 974 | |
---|
| 975 | fid.close() |
---|
| 976 | |
---|
| 977 | return pointlist, attributes, geo_reference |
---|
| 978 | |
---|
| 979 | |
---|
| 980 | def _read_csv_file(file_name, verbose=False): |
---|
| 981 | """Read .csv file |
---|
| 982 | |
---|
| 983 | Return a dic of array of points, and dic of array of attribute |
---|
| 984 | eg |
---|
| 985 | dic['points'] = [[1.0,2.0],[3.0,5.0]] |
---|
| 986 | dic['attributelist']['elevation'] = [[7.0,5.0] |
---|
| 987 | """ |
---|
| 988 | |
---|
| 989 | file_pointer = open(file_name) |
---|
| 990 | header, file_pointer = _read_csv_file_header(file_pointer) |
---|
[4180] | 991 | try: |
---|
| 992 | pointlist, att_dict, geo_ref, file_pointer = \ |
---|
| 993 | _read_csv_file_blocking( \ |
---|
[4126] | 994 | file_pointer, |
---|
| 995 | header, |
---|
[5222] | 996 | max_read_lines=1e30) #If the file is bigger that this, block.. # FIXME (Ole) What's up here? |
---|
| 997 | |
---|
[4180] | 998 | except ANUGAError: |
---|
| 999 | file_pointer.close() |
---|
| 1000 | raise |
---|
[4126] | 1001 | file_pointer.close() |
---|
[4180] | 1002 | return pointlist, att_dict, geo_ref |
---|
[4126] | 1003 | |
---|
| 1004 | CSV_DELIMITER = ',' |
---|
| 1005 | def _read_csv_file_header(file_pointer, |
---|
| 1006 | delimiter=CSV_DELIMITER, |
---|
| 1007 | verbose=False): |
---|
| 1008 | |
---|
| 1009 | """Read the header of a .csv file |
---|
| 1010 | Return a list of the header names |
---|
| 1011 | """ |
---|
| 1012 | line = file_pointer.readline() |
---|
| 1013 | header = clean_line(line, delimiter) |
---|
| 1014 | return header, file_pointer |
---|
| 1015 | |
---|
| 1016 | def _read_csv_file_blocking(file_pointer, header, |
---|
| 1017 | delimiter=CSV_DELIMITER, |
---|
[4129] | 1018 | max_read_lines=MAX_READ_LINES, |
---|
[4126] | 1019 | verbose=False): |
---|
| 1020 | |
---|
| 1021 | |
---|
| 1022 | """ |
---|
| 1023 | Read the body of a .csv file. |
---|
| 1024 | header: The list header of the csv file, with the x and y labels. |
---|
| 1025 | """ |
---|
| 1026 | points = [] |
---|
| 1027 | pointattributes = [] |
---|
| 1028 | att_dict = {} |
---|
| 1029 | |
---|
[4776] | 1030 | # This is to remove the x and y headers. |
---|
[4129] | 1031 | header = header[:] |
---|
[4470] | 1032 | try: |
---|
| 1033 | x_header = header.pop(0) |
---|
| 1034 | y_header = header.pop(0) |
---|
| 1035 | except IndexError: |
---|
| 1036 | # if there are not two columns this will occur. |
---|
| 1037 | # eg if it is a space seperated file |
---|
| 1038 | raise SyntaxError |
---|
[4126] | 1039 | |
---|
| 1040 | read_lines = 0 |
---|
| 1041 | while read_lines<max_read_lines: |
---|
| 1042 | line = file_pointer.readline() |
---|
| 1043 | #print "line",line |
---|
| 1044 | numbers = clean_line(line,delimiter) |
---|
| 1045 | if len(numbers) <= 1: |
---|
| 1046 | break |
---|
| 1047 | if line[0] == '#': |
---|
| 1048 | continue |
---|
| 1049 | read_lines += 1 |
---|
[4470] | 1050 | try: |
---|
| 1051 | x = float(numbers[0]) |
---|
| 1052 | y = float(numbers[1]) |
---|
| 1053 | points.append([x,y]) |
---|
| 1054 | numbers.pop(0) |
---|
| 1055 | numbers.pop(0) |
---|
| 1056 | if len(header) != len(numbers): |
---|
| 1057 | file_pointer.close() |
---|
[4661] | 1058 | msg = "File load error. \ |
---|
| 1059 | There might be a problem with the file header" |
---|
[4470] | 1060 | raise SyntaxError, msg |
---|
| 1061 | for i,num in enumerate(numbers): |
---|
| 1062 | num.strip() |
---|
| 1063 | if num != '\n' and num != '': |
---|
| 1064 | #attributes.append(float(num)) |
---|
| 1065 | att_dict.setdefault(header[i],[]).append(float(num)) |
---|
| 1066 | #except IOError: |
---|
| 1067 | except ValueError: |
---|
| 1068 | raise SyntaxError |
---|
[4126] | 1069 | if points == []: |
---|
| 1070 | raise StopIteration |
---|
| 1071 | |
---|
| 1072 | |
---|
| 1073 | pointlist = array(points).astype(Float) |
---|
| 1074 | for key in att_dict.keys(): |
---|
| 1075 | att_dict[key] = array(att_dict[key]).astype(Float) |
---|
[4180] | 1076 | |
---|
[4776] | 1077 | # Do stuff here so the info is in lat's and longs |
---|
[4180] | 1078 | geo_ref = None |
---|
| 1079 | x_header = lower(x_header[:3]) |
---|
| 1080 | y_header = lower(y_header[:3]) |
---|
| 1081 | if (x_header == 'lon' or x_header == 'lat') and \ |
---|
| 1082 | (y_header == 'lon' or y_header == 'lat'): |
---|
| 1083 | if x_header == 'lon': |
---|
| 1084 | longitudes = ravel(pointlist[:,0:1]) |
---|
| 1085 | latitudes = ravel(pointlist[:,1:]) |
---|
| 1086 | else: |
---|
| 1087 | latitudes = ravel(pointlist[:,0:1]) |
---|
| 1088 | longitudes = ravel(pointlist[:,1:]) |
---|
[4126] | 1089 | |
---|
[4180] | 1090 | pointlist, geo_ref = _set_using_lat_long(latitudes, |
---|
| 1091 | longitudes, |
---|
| 1092 | geo_reference=None, |
---|
| 1093 | data_points=None, |
---|
| 1094 | points_are_lats_longs=False) |
---|
| 1095 | return pointlist, att_dict, geo_ref, file_pointer |
---|
[4126] | 1096 | |
---|
[4175] | 1097 | def _read_pts_file_header(fid, verbose=False): |
---|
| 1098 | |
---|
[4569] | 1099 | """Read the geo_reference and number_of_points from a .pts file |
---|
[4175] | 1100 | """ |
---|
| 1101 | |
---|
| 1102 | keys = fid.variables.keys() |
---|
| 1103 | try: |
---|
| 1104 | keys.remove('points') |
---|
| 1105 | except IOError, e: |
---|
| 1106 | fid.close() |
---|
| 1107 | msg = 'Expected keyword "points" but could not find it' |
---|
| 1108 | raise IOError, msg |
---|
| 1109 | if verbose: print 'Got %d variables: %s' %(len(keys), keys) |
---|
| 1110 | |
---|
| 1111 | try: |
---|
| 1112 | geo_reference = Geo_reference(NetCDFObject=fid) |
---|
| 1113 | except AttributeError, e: |
---|
| 1114 | geo_reference = None |
---|
| 1115 | |
---|
| 1116 | return geo_reference, keys, fid.dimensions['number_of_points'] |
---|
| 1117 | |
---|
| 1118 | def _read_pts_file_blocking(fid, start_row, fin_row, keys): |
---|
| 1119 | #verbose=False): |
---|
| 1120 | |
---|
| 1121 | |
---|
| 1122 | """ |
---|
| 1123 | Read the body of a .csv file. |
---|
| 1124 | header: The list header of the csv file, with the x and y labels. |
---|
| 1125 | """ |
---|
| 1126 | |
---|
| 1127 | pointlist = array(fid.variables['points'][start_row:fin_row]) |
---|
| 1128 | |
---|
| 1129 | attributes = {} |
---|
| 1130 | for key in keys: |
---|
| 1131 | attributes[key] = array(fid.variables[key][start_row:fin_row]) |
---|
| 1132 | |
---|
| 1133 | return pointlist, attributes |
---|
| 1134 | |
---|
[4126] | 1135 | def _write_pts_file(file_name, |
---|
| 1136 | write_data_points, |
---|
| 1137 | write_attributes=None, |
---|
| 1138 | write_geo_reference=None): |
---|
| 1139 | """ |
---|
| 1140 | Write .pts NetCDF file |
---|
| 1141 | |
---|
| 1142 | NOTE: Below might not be valid ask Duncan : NB 5/2006 |
---|
| 1143 | |
---|
| 1144 | WARNING: This function mangles the point_atts data structure |
---|
| 1145 | #F??ME: (DSG)This format has issues. |
---|
| 1146 | # There can't be an attribute called points |
---|
| 1147 | # consider format change |
---|
| 1148 | # method changed by NB not sure if above statement is correct |
---|
| 1149 | |
---|
| 1150 | should create new test for this |
---|
| 1151 | legal_keys = ['pointlist', 'attributelist', 'geo_reference'] |
---|
| 1152 | for key in point_atts.keys(): |
---|
| 1153 | msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys) |
---|
| 1154 | assert key in legal_keys, msg |
---|
| 1155 | """ |
---|
| 1156 | from Scientific.IO.NetCDF import NetCDFFile |
---|
| 1157 | # NetCDF file definition |
---|
| 1158 | outfile = NetCDFFile(file_name, 'w') |
---|
| 1159 | |
---|
[4776] | 1160 | # Create new file |
---|
[4126] | 1161 | outfile.institution = 'Geoscience Australia' |
---|
| 1162 | outfile.description = 'NetCDF format for compact and portable storage ' +\ |
---|
| 1163 | 'of spatial point data' |
---|
| 1164 | |
---|
[4776] | 1165 | # Dimension definitions |
---|
[4126] | 1166 | shape = write_data_points.shape[0] |
---|
| 1167 | outfile.createDimension('number_of_points', shape) |
---|
| 1168 | outfile.createDimension('number_of_dimensions', 2) #This is 2d data |
---|
| 1169 | |
---|
[4776] | 1170 | # Variable definition |
---|
[4126] | 1171 | outfile.createVariable('points', Float, ('number_of_points', |
---|
| 1172 | 'number_of_dimensions')) |
---|
| 1173 | |
---|
| 1174 | #create variables |
---|
| 1175 | outfile.variables['points'][:] = write_data_points #.astype(Float32) |
---|
| 1176 | |
---|
| 1177 | if write_attributes is not None: |
---|
| 1178 | for key in write_attributes.keys(): |
---|
| 1179 | outfile.createVariable(key, Float, ('number_of_points',)) |
---|
| 1180 | outfile.variables[key][:] = write_attributes[key] #.astype(Float32) |
---|
| 1181 | |
---|
| 1182 | if write_geo_reference is not None: |
---|
[4452] | 1183 | write_NetCDF_georeference(write_geo_reference, outfile) |
---|
[4126] | 1184 | |
---|
| 1185 | outfile.close() |
---|
| 1186 | |
---|
[4154] | 1187 | def _write_csv_file(file_name, |
---|
| 1188 | write_data_points, |
---|
[4225] | 1189 | write_attributes=None, |
---|
| 1190 | as_lat_long=False, |
---|
[4154] | 1191 | delimiter=','): |
---|
| 1192 | """ |
---|
[4513] | 1193 | export a file, file_name, with the csv format |
---|
[4126] | 1194 | |
---|
[4154] | 1195 | """ |
---|
| 1196 | points = write_data_points |
---|
| 1197 | pointattributes = write_attributes |
---|
| 1198 | |
---|
| 1199 | fd = open(file_name,'w') |
---|
[4225] | 1200 | if as_lat_long: |
---|
| 1201 | titlelist = "latitude" + delimiter + "longitude" + delimiter |
---|
| 1202 | else: |
---|
| 1203 | titlelist = "x" + delimiter + "y" + delimiter |
---|
[4154] | 1204 | if pointattributes is not None: |
---|
| 1205 | for title in pointattributes.keys(): |
---|
| 1206 | titlelist = titlelist + title + delimiter |
---|
| 1207 | titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter |
---|
| 1208 | fd.write(titlelist+"\n") |
---|
| 1209 | |
---|
[4225] | 1210 | # <x/lat> <y/long> [attributes] |
---|
[4154] | 1211 | for i, vert in enumerate( points): |
---|
| 1212 | |
---|
| 1213 | |
---|
| 1214 | if pointattributes is not None: |
---|
| 1215 | attlist = "," |
---|
| 1216 | for att in pointattributes.keys(): |
---|
| 1217 | attlist = attlist + str(pointattributes[att][i])+ delimiter |
---|
| 1218 | attlist = attlist[0:-len(delimiter)] # remove the last delimiter |
---|
| 1219 | attlist.strip() |
---|
| 1220 | else: |
---|
| 1221 | attlist = '' |
---|
| 1222 | |
---|
| 1223 | fd.write(str(vert[0]) + delimiter + |
---|
| 1224 | str(vert[1]) + attlist + "\n") |
---|
| 1225 | |
---|
| 1226 | fd.close() |
---|
[4238] | 1227 | |
---|
| 1228 | def _write_urs_file(file_name, |
---|
| 1229 | points, |
---|
| 1230 | delimiter=' '): |
---|
| 1231 | """ |
---|
| 1232 | export a file, file_name, with the urs format |
---|
| 1233 | the data points are in lats and longs |
---|
[4154] | 1234 | |
---|
[4238] | 1235 | """ |
---|
| 1236 | fd = open(file_name,'w') |
---|
| 1237 | fd.write(str(len(points))+"\n") |
---|
| 1238 | # <lat> <long> <id#> |
---|
| 1239 | for i, vert in enumerate( points): |
---|
| 1240 | fd.write(str(round(vert[0],7)) + delimiter + \ |
---|
[4349] | 1241 | str(round(vert[1],7)) + delimiter +str(i)+ "\n") |
---|
[4238] | 1242 | fd.close() |
---|
| 1243 | |
---|
[4126] | 1244 | def _point_atts2array(point_atts): |
---|
| 1245 | point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float) |
---|
| 1246 | |
---|
| 1247 | for key in point_atts['attributelist'].keys(): |
---|
| 1248 | point_atts['attributelist'][key]=\ |
---|
| 1249 | array(point_atts['attributelist'][key]).astype(Float) |
---|
| 1250 | return point_atts |
---|
| 1251 | |
---|
| 1252 | |
---|
| 1253 | |
---|
| 1254 | |
---|
| 1255 | def geospatial_data2points_dictionary(geospatial_data): |
---|
| 1256 | """Convert geospatial data to points_dictionary |
---|
| 1257 | """ |
---|
| 1258 | |
---|
| 1259 | points_dictionary = {} |
---|
| 1260 | points_dictionary['pointlist'] = geospatial_data.data_points |
---|
| 1261 | |
---|
| 1262 | points_dictionary['attributelist'] = {} |
---|
| 1263 | |
---|
| 1264 | for attribute_name in geospatial_data.attributes.keys(): |
---|
| 1265 | val = geospatial_data.attributes[attribute_name] |
---|
| 1266 | points_dictionary['attributelist'][attribute_name] = val |
---|
| 1267 | |
---|
| 1268 | points_dictionary['geo_reference'] = geospatial_data.geo_reference |
---|
| 1269 | |
---|
| 1270 | return points_dictionary |
---|
| 1271 | |
---|
| 1272 | |
---|
| 1273 | def points_dictionary2geospatial_data(points_dictionary): |
---|
| 1274 | """Convert points_dictionary to geospatial data object |
---|
| 1275 | """ |
---|
| 1276 | |
---|
| 1277 | msg = 'Points dictionary must have key pointlist' |
---|
| 1278 | assert points_dictionary.has_key('pointlist'), msg |
---|
| 1279 | |
---|
| 1280 | msg = 'Points dictionary must have key attributelist' |
---|
| 1281 | assert points_dictionary.has_key('attributelist'), msg |
---|
| 1282 | |
---|
| 1283 | if points_dictionary.has_key('geo_reference'): |
---|
| 1284 | geo = points_dictionary['geo_reference'] |
---|
| 1285 | else: |
---|
| 1286 | geo = None |
---|
| 1287 | |
---|
| 1288 | return Geospatial_data(points_dictionary['pointlist'], |
---|
| 1289 | points_dictionary['attributelist'], |
---|
| 1290 | geo_reference = geo) |
---|
| 1291 | |
---|
| 1292 | def clean_line(line,delimiter): |
---|
| 1293 | """Remove whitespace |
---|
| 1294 | """ |
---|
| 1295 | #print ">%s" %line |
---|
| 1296 | line = line.strip() |
---|
| 1297 | #print "stripped>%s" %line |
---|
| 1298 | numbers = line.split(delimiter) |
---|
| 1299 | i = len(numbers) - 1 |
---|
| 1300 | while i >= 0: |
---|
| 1301 | if numbers[i] == '': |
---|
| 1302 | numbers.pop(i) |
---|
| 1303 | else: |
---|
| 1304 | numbers[i] = numbers[i].strip() |
---|
| 1305 | |
---|
| 1306 | i += -1 |
---|
| 1307 | #for num in numbers: |
---|
| 1308 | # print "num>%s<" %num |
---|
| 1309 | return numbers |
---|
| 1310 | |
---|
| 1311 | def ensure_absolute(points, geo_reference=None): |
---|
| 1312 | """ |
---|
| 1313 | This function inputs several formats and |
---|
| 1314 | outputs one format. - a numeric array of absolute points. |
---|
| 1315 | |
---|
| 1316 | Inputed formats are; |
---|
| 1317 | points: List or numeric array of coordinate pairs [xi, eta] of |
---|
[4255] | 1318 | points or geospatial object or points file name |
---|
[4126] | 1319 | |
---|
| 1320 | mesh_origin: A geo_reference object or 3-tuples consisting of |
---|
| 1321 | UTM zone, easting and northing. |
---|
| 1322 | If specified vertex coordinates are assumed to be |
---|
| 1323 | relative to their respective origins. |
---|
| 1324 | """ |
---|
| 1325 | if isinstance(points,type('')): |
---|
| 1326 | #It's a string |
---|
| 1327 | #assume it is a point file |
---|
| 1328 | points = Geospatial_data(file_name = points) |
---|
| 1329 | |
---|
| 1330 | if isinstance(points,Geospatial_data): |
---|
| 1331 | points = points.get_data_points( \ |
---|
| 1332 | absolute = True) |
---|
| 1333 | msg = "Use a Geospatial_data object or a mesh origin. Not both." |
---|
| 1334 | assert geo_reference == None, msg |
---|
| 1335 | |
---|
| 1336 | else: |
---|
| 1337 | points = ensure_numeric(points, Float) |
---|
| 1338 | if geo_reference is None: |
---|
| 1339 | geo = None #Geo_reference() |
---|
| 1340 | else: |
---|
| 1341 | if isinstance(geo_reference, Geo_reference): |
---|
| 1342 | geo = geo_reference |
---|
| 1343 | else: |
---|
| 1344 | geo = Geo_reference(geo_reference[0], |
---|
| 1345 | geo_reference[1], |
---|
| 1346 | geo_reference[2]) |
---|
| 1347 | points = geo.get_absolute(points) |
---|
| 1348 | return points |
---|
| 1349 | |
---|
| 1350 | |
---|
| 1351 | def ensure_geospatial(points, geo_reference=None): |
---|
| 1352 | """ |
---|
| 1353 | This function inputs several formats and |
---|
| 1354 | outputs one format. - a geospatial_data instance. |
---|
| 1355 | |
---|
| 1356 | Inputed formats are; |
---|
| 1357 | points: List or numeric array of coordinate pairs [xi, eta] of |
---|
[4255] | 1358 | points or geospatial object |
---|
[4126] | 1359 | |
---|
| 1360 | mesh_origin: A geo_reference object or 3-tuples consisting of |
---|
| 1361 | UTM zone, easting and northing. |
---|
| 1362 | If specified vertex coordinates are assumed to be |
---|
| 1363 | relative to their respective origins. |
---|
| 1364 | """ |
---|
| 1365 | if isinstance(points,Geospatial_data): |
---|
| 1366 | msg = "Use a Geospatial_data object or a mesh origin. Not both." |
---|
| 1367 | assert geo_reference == None, msg |
---|
| 1368 | return points |
---|
| 1369 | else: |
---|
| 1370 | points = ensure_numeric(points, Float) |
---|
| 1371 | if geo_reference is None: |
---|
| 1372 | geo = None |
---|
| 1373 | else: |
---|
| 1374 | if isinstance(geo_reference, Geo_reference): |
---|
| 1375 | geo = geo_reference |
---|
| 1376 | else: |
---|
| 1377 | geo = Geo_reference(geo_reference[0], |
---|
| 1378 | geo_reference[1], |
---|
| 1379 | geo_reference[2]) |
---|
| 1380 | points = Geospatial_data(data_points=points, geo_reference=geo) |
---|
| 1381 | return points |
---|
| 1382 | |
---|
[4642] | 1383 | |
---|
| 1384 | |
---|
| 1385 | def find_optimal_smoothing_parameter(data_file, |
---|
[4744] | 1386 | alpha_list=None, |
---|
[4642] | 1387 | mesh_file=None, |
---|
[4847] | 1388 | boundary_poly=None, |
---|
[4642] | 1389 | mesh_resolution=100000, |
---|
| 1390 | north_boundary=None, |
---|
| 1391 | south_boundary=None, |
---|
| 1392 | east_boundary=None, |
---|
| 1393 | west_boundary=None, |
---|
[4745] | 1394 | plot_name='all_alphas', |
---|
[4879] | 1395 | split_factor=0.1, |
---|
[4744] | 1396 | seed_num=None, |
---|
| 1397 | cache=False, |
---|
| 1398 | verbose=False |
---|
[4642] | 1399 | ): |
---|
[4744] | 1400 | |
---|
| 1401 | """ |
---|
[5146] | 1402 | Removes a small random sample of points from 'data_file'. Then creates |
---|
| 1403 | models with different alpha values from 'alpha_list' and cross validates |
---|
| 1404 | the predicted value to the previously removed point data. Returns the |
---|
| 1405 | alpha value which has the smallest covariance. |
---|
| 1406 | |
---|
[4744] | 1407 | data_file: must not contain points outside the boundaries defined |
---|
| 1408 | and it either a pts, txt or csv file. |
---|
| 1409 | |
---|
| 1410 | alpha_list: the alpha values to test in a single list |
---|
| 1411 | |
---|
[4816] | 1412 | mesh_file: name of the created mesh file or if passed in will read it. |
---|
| 1413 | NOTE, if there is a mesh file mesh_resolution, |
---|
| 1414 | north_boundary, south... etc will be ignored. |
---|
[4744] | 1415 | |
---|
| 1416 | mesh_resolution: the maximum area size for a triangle |
---|
| 1417 | |
---|
| 1418 | north_boundary... west_boundary: the value of the boundary |
---|
| 1419 | |
---|
| 1420 | plot_name: the name for the plot contain the results |
---|
| 1421 | |
---|
| 1422 | seed_num: the seed to the random number generator |
---|
| 1423 | |
---|
| 1424 | USAGE: |
---|
[4816] | 1425 | value, alpha = find_optimal_smoothing_parameter(data_file=fileName, |
---|
[4744] | 1426 | alpha_list=[0.0001, 0.01, 1], |
---|
| 1427 | mesh_file=None, |
---|
| 1428 | mesh_resolution=3, |
---|
| 1429 | north_boundary=5, |
---|
| 1430 | south_boundary=-5, |
---|
| 1431 | east_boundary=5, |
---|
| 1432 | west_boundary=-5, |
---|
| 1433 | plot_name='all_alphas', |
---|
| 1434 | seed_num=100000, |
---|
| 1435 | verbose=False) |
---|
| 1436 | |
---|
| 1437 | OUTPUT: returns the minumum normalised covalance calculate AND the |
---|
| 1438 | alpha that created it. PLUS writes a plot of the results |
---|
[4847] | 1439 | |
---|
[5146] | 1440 | NOTE: code will not work if the data_file extent is greater than the |
---|
| 1441 | boundary_polygon or any of the boundaries, eg north_boundary...west_boundary |
---|
[4744] | 1442 | |
---|
| 1443 | """ |
---|
[4642] | 1444 | |
---|
| 1445 | from anuga.shallow_water import Domain |
---|
| 1446 | from anuga.geospatial_data.geospatial_data import Geospatial_data |
---|
| 1447 | from anuga.pmesh.mesh_interface import create_mesh_from_regions |
---|
[4126] | 1448 | |
---|
[4642] | 1449 | from anuga.utilities.numerical_tools import cov |
---|
[4744] | 1450 | from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin |
---|
[4847] | 1451 | from anuga.utilities.polygon import is_inside_polygon |
---|
[5003] | 1452 | from anuga.fit_interpolate.benchmark_least_squares import mem_usage |
---|
| 1453 | |
---|
[4126] | 1454 | |
---|
[4744] | 1455 | attribute_smoothed='elevation' |
---|
| 1456 | |
---|
[4642] | 1457 | if mesh_file is None: |
---|
[5010] | 1458 | if verbose: print "building mesh" |
---|
[4642] | 1459 | mesh_file='temp.msh' |
---|
[4744] | 1460 | |
---|
[4816] | 1461 | if north_boundary is None or south_boundary is None or \ |
---|
| 1462 | east_boundary is None or west_boundary is None: |
---|
| 1463 | no_boundary=True |
---|
| 1464 | else: |
---|
| 1465 | no_boundary=False |
---|
[4642] | 1466 | |
---|
[4816] | 1467 | if no_boundary is True: |
---|
| 1468 | msg= 'All boundaries must be defined' |
---|
| 1469 | raise msg |
---|
[4642] | 1470 | |
---|
[4816] | 1471 | poly_topo = [[east_boundary,south_boundary], |
---|
| 1472 | [east_boundary,north_boundary], |
---|
| 1473 | [west_boundary,north_boundary], |
---|
| 1474 | [west_boundary,south_boundary]] |
---|
| 1475 | |
---|
| 1476 | create_mesh_from_regions(poly_topo, |
---|
| 1477 | boundary_tags={'back': [2], |
---|
| 1478 | 'side': [1,3], |
---|
| 1479 | 'ocean': [0]}, |
---|
| 1480 | maximum_triangle_area=mesh_resolution, |
---|
| 1481 | filename=mesh_file, |
---|
| 1482 | use_cache=cache, |
---|
| 1483 | verbose=verbose) |
---|
| 1484 | |
---|
| 1485 | else: # if mesh file provided |
---|
| 1486 | #test mesh file exists? |
---|
[5010] | 1487 | if verbose: "reading from file: %s" %mesh_file |
---|
[4816] | 1488 | if access(mesh_file,F_OK) == 0: |
---|
| 1489 | msg="file %s doesn't exist!" %mesh_file |
---|
| 1490 | raise IOError, msg |
---|
| 1491 | |
---|
[4642] | 1492 | #split topo data |
---|
[5010] | 1493 | if verbose: print 'Reading elevation file: %s' %data_file |
---|
[4744] | 1494 | G = Geospatial_data(file_name = data_file) |
---|
[5010] | 1495 | if verbose: print 'Start split' |
---|
[4879] | 1496 | G_small, G_other = G.split(split_factor,seed_num, verbose=verbose) |
---|
[5010] | 1497 | if verbose: print 'Finish split' |
---|
[4744] | 1498 | points=G_small.get_data_points() |
---|
[4847] | 1499 | |
---|
[4744] | 1500 | if verbose: print "Number of points in sample to compare: ", len(points) |
---|
| 1501 | |
---|
| 1502 | if alpha_list==None: |
---|
| 1503 | alphas = [0.001,0.01,100] |
---|
| 1504 | #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\ |
---|
| 1505 | # 0.1, 1.0, 10.0, 100.0,1000.0,10000.0] |
---|
| 1506 | |
---|
| 1507 | else: |
---|
| 1508 | alphas=alpha_list |
---|
[5003] | 1509 | |
---|
| 1510 | #creates array with columns 1 and 2 are x, y. column 3 is elevation |
---|
| 1511 | #4 onwards is the elevation_predicted using the alpha, which will |
---|
| 1512 | #be compared later against the real removed data |
---|
| 1513 | data=array([],typecode=Float) |
---|
| 1514 | |
---|
| 1515 | data=resize(data,(len(points),3+len(alphas))) |
---|
| 1516 | |
---|
| 1517 | #gets relative point from sample |
---|
| 1518 | data[:,0]=points[:,0] |
---|
| 1519 | data[:,1]=points[:,1] |
---|
| 1520 | elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed) |
---|
| 1521 | data[:,2]=elevation_sample |
---|
| 1522 | |
---|
| 1523 | normal_cov=array(zeros([len(alphas),2]),typecode=Float) |
---|
| 1524 | |
---|
| 1525 | if verbose: print 'Setup computational domains with different alphas' |
---|
| 1526 | |
---|
| 1527 | #print 'memory usage before domains',mem_usage() |
---|
| 1528 | |
---|
| 1529 | for i,alpha in enumerate(alphas): |
---|
| 1530 | #add G_other data to domains with different alphas |
---|
| 1531 | if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n' |
---|
| 1532 | domain = Domain(mesh_file, use_cache=cache, verbose=verbose) |
---|
| 1533 | if verbose:print domain.statistics() |
---|
| 1534 | domain.set_quantity(attribute_smoothed, |
---|
| 1535 | geospatial_data = G_other, |
---|
| 1536 | use_cache = cache, |
---|
| 1537 | verbose = verbose, |
---|
| 1538 | alpha = alpha) |
---|
| 1539 | |
---|
| 1540 | points_geo=domain.geo_reference.change_points_geo_ref(points) |
---|
| 1541 | #returns the predicted elevation of the points that were "split" out |
---|
| 1542 | #of the original data set for one particular alpha |
---|
[5010] | 1543 | if verbose: print 'Get predicted elevation for location to be compared' |
---|
[5003] | 1544 | elevation_predicted=domain.quantities[attribute_smoothed].\ |
---|
| 1545 | get_values(interpolation_points=points_geo) |
---|
| 1546 | |
---|
| 1547 | #add predicted elevation to array that starts with x, y, z... |
---|
| 1548 | data[:,i+3]=elevation_predicted |
---|
| 1549 | |
---|
| 1550 | sample_cov= cov(elevation_sample) |
---|
| 1551 | #print elevation_predicted |
---|
| 1552 | ele_cov= cov(elevation_sample-elevation_predicted) |
---|
| 1553 | normal_cov[i,:]= [alpha,ele_cov/sample_cov] |
---|
| 1554 | #print 'memory usage during compare',mem_usage() |
---|
| 1555 | |
---|
[5010] | 1556 | if verbose: print'Covariance for alpha ',normal_cov[i][0],'= ',normal_cov[i][1] |
---|
| 1557 | if verbose: print'-------------------------------------------- \n' |
---|
[5003] | 1558 | |
---|
| 1559 | normal_cov0=normal_cov[:,0] |
---|
| 1560 | normal_cov_new=take(normal_cov,argsort(normal_cov0)) |
---|
| 1561 | |
---|
| 1562 | if plot_name is not None: |
---|
| 1563 | from pylab import savefig,semilogx,loglog |
---|
| 1564 | semilogx(normal_cov_new[:,0],normal_cov_new[:,1]) |
---|
| 1565 | loglog(normal_cov_new[:,0],normal_cov_new[:,1]) |
---|
| 1566 | savefig(plot_name,dpi=300) |
---|
| 1567 | if mesh_file == 'temp.msh': |
---|
| 1568 | remove(mesh_file) |
---|
| 1569 | |
---|
[5004] | 1570 | if verbose: |
---|
| 1571 | print 'Final results:' |
---|
[5010] | 1572 | for i,alpha in enumerate(alphas): |
---|
[5004] | 1573 | print'covariance for alpha %s = %s ' %(normal_cov[i][0],normal_cov[i][1]) |
---|
| 1574 | print '\n Optimal alpha is: %s ' % normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0] |
---|
| 1575 | |
---|
[5146] | 1576 | # covariance and optimal alpha |
---|
[5003] | 1577 | return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0] |
---|
| 1578 | |
---|
| 1579 | def old_find_optimal_smoothing_parameter(data_file, |
---|
| 1580 | alpha_list=None, |
---|
| 1581 | mesh_file=None, |
---|
| 1582 | boundary_poly=None, |
---|
| 1583 | mesh_resolution=100000, |
---|
| 1584 | north_boundary=None, |
---|
| 1585 | south_boundary=None, |
---|
| 1586 | east_boundary=None, |
---|
| 1587 | west_boundary=None, |
---|
| 1588 | plot_name='all_alphas', |
---|
| 1589 | split_factor=0.1, |
---|
| 1590 | seed_num=None, |
---|
| 1591 | cache=False, |
---|
| 1592 | verbose=False |
---|
| 1593 | ): |
---|
| 1594 | |
---|
| 1595 | """ |
---|
| 1596 | data_file: must not contain points outside the boundaries defined |
---|
| 1597 | and it either a pts, txt or csv file. |
---|
| 1598 | |
---|
| 1599 | alpha_list: the alpha values to test in a single list |
---|
| 1600 | |
---|
| 1601 | mesh_file: name of the created mesh file or if passed in will read it. |
---|
| 1602 | NOTE, if there is a mesh file mesh_resolution, |
---|
| 1603 | north_boundary, south... etc will be ignored. |
---|
| 1604 | |
---|
| 1605 | mesh_resolution: the maximum area size for a triangle |
---|
| 1606 | |
---|
| 1607 | north_boundary... west_boundary: the value of the boundary |
---|
| 1608 | |
---|
| 1609 | plot_name: the name for the plot contain the results |
---|
| 1610 | |
---|
| 1611 | seed_num: the seed to the random number generator |
---|
| 1612 | |
---|
| 1613 | USAGE: |
---|
| 1614 | value, alpha = find_optimal_smoothing_parameter(data_file=fileName, |
---|
| 1615 | alpha_list=[0.0001, 0.01, 1], |
---|
| 1616 | mesh_file=None, |
---|
| 1617 | mesh_resolution=3, |
---|
| 1618 | north_boundary=5, |
---|
| 1619 | south_boundary=-5, |
---|
| 1620 | east_boundary=5, |
---|
| 1621 | west_boundary=-5, |
---|
| 1622 | plot_name='all_alphas', |
---|
| 1623 | seed_num=100000, |
---|
| 1624 | verbose=False) |
---|
| 1625 | |
---|
| 1626 | OUTPUT: returns the minumum normalised covalance calculate AND the |
---|
| 1627 | alpha that created it. PLUS writes a plot of the results |
---|
| 1628 | |
---|
| 1629 | NOTE: code will not work if the data_file extend is greater than the |
---|
| 1630 | boundary_polygon or the north_boundary...west_boundary |
---|
| 1631 | |
---|
| 1632 | """ |
---|
| 1633 | |
---|
| 1634 | from anuga.shallow_water import Domain |
---|
| 1635 | from anuga.geospatial_data.geospatial_data import Geospatial_data |
---|
| 1636 | from anuga.pmesh.mesh_interface import create_mesh_from_regions |
---|
| 1637 | |
---|
| 1638 | from anuga.utilities.numerical_tools import cov |
---|
| 1639 | from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin |
---|
| 1640 | from anuga.utilities.polygon import is_inside_polygon |
---|
| 1641 | from anuga.fit_interpolate.benchmark_least_squares import mem_usage |
---|
| 1642 | |
---|
| 1643 | |
---|
| 1644 | attribute_smoothed='elevation' |
---|
| 1645 | |
---|
| 1646 | if mesh_file is None: |
---|
| 1647 | mesh_file='temp.msh' |
---|
| 1648 | |
---|
| 1649 | if north_boundary is None or south_boundary is None or \ |
---|
| 1650 | east_boundary is None or west_boundary is None: |
---|
| 1651 | no_boundary=True |
---|
| 1652 | else: |
---|
| 1653 | no_boundary=False |
---|
| 1654 | |
---|
| 1655 | if no_boundary is True: |
---|
| 1656 | msg= 'All boundaries must be defined' |
---|
| 1657 | raise msg |
---|
| 1658 | |
---|
| 1659 | poly_topo = [[east_boundary,south_boundary], |
---|
| 1660 | [east_boundary,north_boundary], |
---|
| 1661 | [west_boundary,north_boundary], |
---|
| 1662 | [west_boundary,south_boundary]] |
---|
| 1663 | |
---|
| 1664 | create_mesh_from_regions(poly_topo, |
---|
| 1665 | boundary_tags={'back': [2], |
---|
| 1666 | 'side': [1,3], |
---|
| 1667 | 'ocean': [0]}, |
---|
| 1668 | maximum_triangle_area=mesh_resolution, |
---|
| 1669 | filename=mesh_file, |
---|
| 1670 | use_cache=cache, |
---|
| 1671 | verbose=verbose) |
---|
| 1672 | |
---|
| 1673 | else: # if mesh file provided |
---|
| 1674 | #test mesh file exists? |
---|
| 1675 | if access(mesh_file,F_OK) == 0: |
---|
| 1676 | msg="file %s doesn't exist!" %mesh_file |
---|
| 1677 | raise IOError, msg |
---|
| 1678 | |
---|
| 1679 | #split topo data |
---|
| 1680 | G = Geospatial_data(file_name = data_file) |
---|
| 1681 | if verbose: print 'start split' |
---|
| 1682 | G_small, G_other = G.split(split_factor,seed_num, verbose=verbose) |
---|
| 1683 | if verbose: print 'finish split' |
---|
| 1684 | points=G_small.get_data_points() |
---|
| 1685 | |
---|
| 1686 | #FIXME: Remove points outside boundary polygon |
---|
| 1687 | # print 'new point',len(points) |
---|
| 1688 | # |
---|
| 1689 | # new_points=[] |
---|
| 1690 | # new_points=array([],typecode=Float) |
---|
| 1691 | # new_points=resize(new_points,(len(points),2)) |
---|
| 1692 | # print "BOUNDARY", boundary_poly |
---|
| 1693 | # for i,point in enumerate(points): |
---|
| 1694 | # if is_inside_polygon(point,boundary_poly, verbose=True): |
---|
| 1695 | # new_points[i] = point |
---|
| 1696 | # print"WOW",i,new_points[i] |
---|
| 1697 | # points = new_points |
---|
| 1698 | |
---|
| 1699 | |
---|
| 1700 | if verbose: print "Number of points in sample to compare: ", len(points) |
---|
| 1701 | |
---|
| 1702 | if alpha_list==None: |
---|
| 1703 | alphas = [0.001,0.01,100] |
---|
| 1704 | #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\ |
---|
| 1705 | # 0.1, 1.0, 10.0, 100.0,1000.0,10000.0] |
---|
| 1706 | |
---|
| 1707 | else: |
---|
| 1708 | alphas=alpha_list |
---|
[4642] | 1709 | domains = {} |
---|
| 1710 | |
---|
[4744] | 1711 | if verbose: print 'Setup computational domains with different alphas' |
---|
[5003] | 1712 | |
---|
| 1713 | print 'memory usage before domains',mem_usage() |
---|
| 1714 | |
---|
[4642] | 1715 | for alpha in alphas: |
---|
[4744] | 1716 | #add G_other data to domains with different alphas |
---|
[4847] | 1717 | if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n' |
---|
| 1718 | domain = Domain(mesh_file, use_cache=cache, verbose=verbose) |
---|
[4744] | 1719 | if verbose:print domain.statistics() |
---|
[4642] | 1720 | domain.set_quantity(attribute_smoothed, |
---|
| 1721 | geospatial_data = G_other, |
---|
[4847] | 1722 | use_cache = cache, |
---|
[4744] | 1723 | verbose = verbose, |
---|
[4642] | 1724 | alpha = alpha) |
---|
| 1725 | domains[alpha]=domain |
---|
| 1726 | |
---|
[5003] | 1727 | print 'memory usage after domains',mem_usage() |
---|
| 1728 | |
---|
[4744] | 1729 | #creates array with columns 1 and 2 are x, y. column 3 is elevation |
---|
| 1730 | #4 onwards is the elevation_predicted using the alpha, which will |
---|
| 1731 | #be compared later against the real removed data |
---|
[4642] | 1732 | data=array([],typecode=Float) |
---|
[4744] | 1733 | |
---|
[4642] | 1734 | data=resize(data,(len(points),3+len(alphas))) |
---|
[4744] | 1735 | |
---|
[4642] | 1736 | #gets relative point from sample |
---|
| 1737 | data[:,0]=points[:,0] |
---|
| 1738 | data[:,1]=points[:,1] |
---|
| 1739 | elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed) |
---|
| 1740 | data[:,2]=elevation_sample |
---|
| 1741 | |
---|
| 1742 | normal_cov=array(zeros([len(alphas),2]),typecode=Float) |
---|
| 1743 | |
---|
[4847] | 1744 | if verbose: print 'Determine difference between predicted results and actual data' |
---|
[4744] | 1745 | for i,alpha in enumerate(domains): |
---|
[4847] | 1746 | if verbose: print'Alpha =',alpha |
---|
| 1747 | |
---|
[4744] | 1748 | points_geo=domains[alpha].geo_reference.change_points_geo_ref(points) |
---|
| 1749 | #returns the predicted elevation of the points that were "split" out |
---|
| 1750 | #of the original data set for one particular alpha |
---|
[4847] | 1751 | elevation_predicted=domains[alpha].quantities[attribute_smoothed].\ |
---|
| 1752 | get_values(interpolation_points=points_geo) |
---|
| 1753 | |
---|
| 1754 | #add predicted elevation to array that starts with x, y, z... |
---|
[4744] | 1755 | data[:,i+3]=elevation_predicted |
---|
[4642] | 1756 | |
---|
| 1757 | sample_cov= cov(elevation_sample) |
---|
[4744] | 1758 | #print elevation_predicted |
---|
[4642] | 1759 | ele_cov= cov(elevation_sample-elevation_predicted) |
---|
[4744] | 1760 | normal_cov[i,:]= [alpha,ele_cov/sample_cov] |
---|
[5003] | 1761 | print 'memory usage during compare',mem_usage() |
---|
| 1762 | |
---|
[4642] | 1763 | |
---|
[4744] | 1764 | if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1] |
---|
[4745] | 1765 | |
---|
[4642] | 1766 | normal_cov0=normal_cov[:,0] |
---|
| 1767 | normal_cov_new=take(normal_cov,argsort(normal_cov0)) |
---|
[4745] | 1768 | |
---|
| 1769 | if plot_name is not None: |
---|
| 1770 | from pylab import savefig,semilogx,loglog |
---|
| 1771 | semilogx(normal_cov_new[:,0],normal_cov_new[:,1]) |
---|
| 1772 | loglog(normal_cov_new[:,0],normal_cov_new[:,1]) |
---|
| 1773 | savefig(plot_name,dpi=300) |
---|
[4816] | 1774 | if mesh_file == 'temp.msh': |
---|
| 1775 | remove(mesh_file) |
---|
[4642] | 1776 | |
---|
[4744] | 1777 | return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0] |
---|
[4642] | 1778 | |
---|
[4126] | 1779 | |
---|
| 1780 | if __name__ == "__main__": |
---|
[4178] | 1781 | pass |
---|
[4126] | 1782 | |
---|