Changeset 6081
- Timestamp:
- Dec 16, 2008, 10:17:36 AM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
anuga_core/source/anuga/geospatial_data/geospatial_data.py
r5739 r6081 1 """Class Geospatial_data - Manipulation of locations on the planet and 1 """Class Geospatial_data - Manipulation of locations on the planet and 2 2 associated attributes. 3 3 4 4 """ 5 5 6 from sys import maxint 6 7 from os import access, F_OK, R_OK,remove … … 9 10 from string import lower 10 11 from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \ 11 size, shape 12 #from Array import tolist 12 size, shape 13 13 from RandomArray import randint, seed, get_seed 14 14 from copy import deepcopy 15 16 #from MA import tolist 17 18 from Scientific.IO.NetCDF import NetCDFFile 19 from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL 15 from Scientific.IO.NetCDF import NetCDFFile 16 from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL 20 17 from anuga.utilities.numerical_tools import ensure_numeric 21 18 from anuga.coordinate_transforms.geo_reference import Geo_reference, \ … … 23 20 from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm 24 21 from anuga.utilities.anuga_exceptions import ANUGAError 25 from anuga.config import points_file_block_line_size as MAX_READ_LINES 26 #from anuga.fit_interpolate.benchmark_least_squares import mem_usage 27 22 from anuga.config import points_file_block_line_size as MAX_READ_LINES 23 28 24 29 25 DEFAULT_ATTRIBUTE = 'elevation' 30 26 27 28 ## 29 # @brief ?? 31 30 class Geospatial_data: 32 31 32 ## 33 # @brief 34 # @param data_points Mx2 iterable of tuples or array of x,y coordinates. 35 # @param attributes Associated values for each data point. 36 # @param geo_reference ?? 37 # @param default_attribute_name ?? 38 # @param file_name 39 # @param latitudes ?? 40 # @param longitudes ?? 41 # @param points_are_lats_longs True if points are lat/long, not UTM. 42 # @param max_read_lines Size of block to read, if blocking. 43 # @param load_file_now True if blocking but we eant to read file now. 44 # @param verbose True if this class instance is verbose. 33 45 def __init__(self, 34 46 data_points=None, # this can also be a points file name … … 43 55 load_file_now=True, 44 56 verbose=False): 45 46 47 57 """ 48 58 Create instance from data points and associated attributes … … 57 67 in the dictionary represent the attribute names, in the former 58 68 the attribute will get the default name "elevation". 59 69 60 70 geo_reference: Object representing the origin of the data 61 71 points. It contains UTM zone, easting and northing and data … … 70 80 latitudes, longitudes: Vectors of latitudes and longitudes, 71 81 used to specify location instead of points. 72 82 73 83 points_are_lats_longs: Set this as true if the points are actually 74 84 lats and longs, not UTM … … 79 89 load_file_now: If true the file is automatically loaded 80 90 into the geospatial instance. Used when blocking. 81 82 file_name: Name of input netCDF file or .txt file. netCDF file must 91 92 file_name: Name of input netCDF file or .txt file. netCDF file must 83 93 have dimensions "points" etc. 84 94 .txt file is a comma seperated file with x, y and attribute 85 95 data. 86 96 87 97 The first line has the titles of the columns. The first two 88 98 column titles are checked to see if they start with lat or … … 91 101 UTM. Otherwise the first two columns are assumed to be the x 92 102 and y, and the title names acually used are ignored. 93 94 103 104 95 105 The format for a .txt file is: 96 106 1st line: [column names] … … 105 115 The first two columns have to be x, y or lat, long 106 116 coordinates. 107 108 117 118 109 119 The format for a Points dictionary is: 110 111 120 ['pointlist'] a 2 column array describing points. 1st column x, 112 121 2nd column y. … … 119 128 dic['pointlist'] = [[1.0,2.0],[3.0,5.0]] 120 129 dic['attributelist']['elevation'] = [[7.0,5.0] 121 130 122 131 verbose: 123 132 124 125 133 """ 126 134 … … 130 138 131 139 self.set_verbose(verbose) 132 self.geo_reference =None #create the attribute140 self.geo_reference = None #create the attribute 133 141 self.file_name = file_name 134 142 135 143 if max_read_lines is None: 136 144 self.max_read_lines = MAX_READ_LINES … … 139 147 140 148 if file_name is None: 141 if latitudes is not None or longitudes is not None or \ 142 points_are_lats_longs: 143 data_points, geo_reference = \ 144 _set_using_lat_long(latitudes=latitudes, 145 longitudes=longitudes, 146 geo_reference=geo_reference, 147 data_points=data_points, 148 points_are_lats_longs=points_are_lats_longs) 149 if latitudes is not None \ 150 or longitudes is not None \ 151 or points_are_lats_longs: 152 data_points, geo_reference = \ 153 _set_using_lat_long(latitudes=latitudes, 154 longitudes=longitudes, 155 geo_reference=geo_reference, 156 data_points=data_points, 157 points_are_lats_longs=\ 158 points_are_lats_longs) 149 159 self.check_data_points(data_points) 150 160 self.set_attributes(attributes) 151 161 self.set_geo_reference(geo_reference) 152 162 self.set_default_attribute_name(default_attribute_name) 153 154 163 elif load_file_now is True: 155 164 # watch for case where file name and points, … … 159 168 if verbose is True: 160 169 if file_name is not None: 161 print 'Loading Geospatial data from file: %s' % file_name162 170 print 'Loading Geospatial data from file: %s' % file_name 171 163 172 self.import_points_file(file_name, verbose=verbose) 164 173 165 174 self.check_data_points(self.data_points) 166 self.set_attributes(self.attributes) 175 self.set_attributes(self.attributes) 167 176 self.set_geo_reference(self.geo_reference) 168 177 self.set_default_attribute_name(default_attribute_name) … … 170 179 if verbose is True: 171 180 if file_name is not None: 172 print 'Geospatial data created from file: %s' % file_name181 print 'Geospatial data created from file: %s' % file_name 173 182 if load_file_now is False: 174 183 print 'Data will be loaded blockwise on demand' … … 182 191 #print 'data as a pts NetCDF format' 183 192 193 ## 194 # @brief Return length of the points set. 184 195 def __len__(self): 185 196 return len(self.data_points) 186 197 198 ## 199 # @brief Return a string representation of the points set. 187 200 def __repr__(self): 188 201 return str(self.get_data_points(absolute=True)) 189 190 202 203 ## 204 # @brief Save points data in instance. 205 # @param data_points Points data to store in instance and check. 206 # @note Throws ValueError exception if no data. 191 207 def check_data_points(self, data_points): 192 208 """Checks data points 193 209 """ 194 210 195 211 if data_points is None: 196 212 self.data_points = None 197 213 msg = 'There is no data or file provided!' 198 214 raise ValueError, msg 199 215 200 216 else: 201 217 self.data_points = ensure_numeric(data_points) 202 #print "self.data_points.shape",self.data_points.shape203 218 if not (0,) == self.data_points.shape: 204 219 assert len(self.data_points.shape) == 2 205 220 assert self.data_points.shape[1] == 2 206 221 222 ## 223 # @brief Check and assign attributes data. 224 # @param attributes Dictionary or scalar to save as .attributes. 225 # @note Throws exception if unable to convert dict keys to numeric. 207 226 def set_attributes(self, attributes): 208 227 """Check and assign attributes dictionary 209 228 """ 210 229 211 230 if attributes is None: 212 231 self.attributes = None 213 232 return 214 233 215 234 if not isinstance(attributes, DictType): 216 235 #Convert single attribute into dictionary 217 236 attributes = {DEFAULT_ATTRIBUTE: attributes} 218 237 219 #Check input attributes 238 #Check input attributes 220 239 for key in attributes.keys(): 221 240 try: … … 226 245 raise msg 227 246 228 self.attributes = attributes 247 self.attributes = attributes 229 248 230 249 #def set_geo_reference(self, geo_reference): … … 232 251 # self.setgeo_reference(geo_reference) 233 252 253 ## 254 # @brief Set the georeference of geospatial data. 255 # @param geo_reference The georeference data to set. 256 # @note Will raise exception if param not instance of Geo_reference. 234 257 def set_geo_reference(self, geo_reference): 235 258 """Set the georeference of geospatial data. 236 259 237 260 It can also be used to change the georeference and will ensure that 238 261 the absolute coordinate values are unchanged. 239 262 """ 263 240 264 from anuga.coordinate_transforms.geo_reference import Geo_reference 241 265 … … 243 267 # Use default - points are in absolute coordinates 244 268 geo_reference = Geo_reference() 245 246 # Allow for tuple (zone, xllcorner, yllcorner) 269 270 # Allow for tuple (zone, xllcorner, yllcorner) 247 271 geo_reference = ensure_geo_reference(geo_reference) 248 272 249 273 if not isinstance(geo_reference, Geo_reference): 250 # FIXME (Ole): This exception will be raised even 274 # FIXME (Ole): This exception will be raised even 251 275 # if geo_reference is None. Is that the intent Duncan? 252 msg = 'Argument geo_reference must be a valid Geo_reference \n'276 msg = 'Argument geo_reference must be a valid Geo_reference ' 253 277 msg += 'object or None.' 254 278 raise msg … … 258 282 if self.geo_reference is not None: 259 283 self.data_points = self.get_data_points(geo_reference=geo_reference) 260 284 261 285 self.geo_reference = geo_reference 262 286 263 287 ## 288 # @brief Set default attribute name. 289 # @param default_attribute_name The default to save. 264 290 def set_default_attribute_name(self, default_attribute_name): 265 291 self.default_attribute_name = default_attribute_name 266 292 293 ## 294 # @brief Set the instance verbose flag. 295 # @param verbose The value to save. 296 # @note Will raise exception if param is not True or False. 267 297 def set_verbose(self, verbose=False): 268 298 if verbose in [False, True]: 269 299 self.verbose = verbose 270 300 else: 271 msg = 'Illegal value: %s' % str(verbose)301 msg = 'Illegal value: %s' % str(verbose) 272 302 raise Exception, msg 273 303 304 ## 305 # @brief Clip geospatial data by a given polygon. 306 # @param polygon The polygon to clip with. 307 # @param closed True if points on clip boundary are not included in result. 308 # @param verbose True if this function is verbose. 274 309 def clip(self, polygon, closed=True, verbose=False): 275 310 """Clip geospatial data by a polygon … … 281 316 regarded as belonging to the polygon (closed = True) 282 317 or not (closed = False). Default is True. 283 318 284 319 Output 285 320 New geospatial data object representing points inside 286 321 specified polygon. 287 288 289 Note - this method is non-destructive and leaves the data in 'self' 322 323 324 Note - this method is non-destructive and leaves the data in 'self' 290 325 unchanged 291 326 """ … … 297 332 polygon = polygon.get_data_points() 298 333 299 points = self.get_data_points() 300 # if verbose: print '%s points:%s' %(verbose,points) 334 points = self.get_data_points() 301 335 inside_indices = inside_polygon(points, polygon, closed, verbose) 302 336 303 337 clipped_G = self.get_sample(inside_indices) 304 338 305 # clipped_points = take(points, inside_indices)306 307 # Clip all attributes308 # attributes = self.get_all_attributes()309 310 # clipped_attributes = {}311 # if attributes is not None:312 # for key, att in attributes.items():313 # clipped_attributes[key] = take(att, inside_indices)314 315 # return Geospatial_data(clipped_points, clipped_attributes)316 339 return clipped_G 317 318 319 def clip_outside(self, polygon, closed=True,verbose=False): 340 341 ## 342 # @brief Clip points data by polygon, return points outside polygon. 343 # @param polygon The polygon to clip with. 344 # @param closed True if points on clip boundary are not included in result. 345 # @param verbose True if this function is verbose. 346 def clip_outside(self, polygon, closed=True, verbose=False): 320 347 """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon 321 348 … … 326 353 regarded as belonging to the polygon (closed = True) 327 354 or not (closed = False). Default is True. 328 355 329 356 Output 330 357 Geospatial data object representing point OUTSIDE specified polygon 331 332 358 """ 333 359 … … 338 364 polygon = polygon.get_data_points() 339 365 340 points = self.get_data_points() 366 points = self.get_data_points() 341 367 outside_indices = outside_polygon(points, polygon, closed,verbose) 342 368 343 369 clipped_G = self.get_sample(outside_indices) 344 370 345 # clipped_points = take(points, outside_indices)346 347 # Clip all attributes348 # attributes = self.get_all_attributes()349 350 # clipped_attributes = {}351 # if attributes is not None:352 # for key, att in attributes.items():353 # clipped_attributes[key] = take(att, outside_indices)354 355 # return Geospatial_data(clipped_points, clipped_attributes)356 371 return clipped_G 357 372 358 373 ## 374 # @brief Get instance geo_reference data. 359 375 def get_geo_reference(self): 360 376 return self.geo_reference 361 377 378 ## 379 # @brief Get coordinates for all data points as an Nx2 array. 380 # @param absolute If True, return UTM, else relative to xll/yll corners. 381 # @param geo_reference If supplied, points are relative to it. 382 # @param as_lat_long If True, return points as lat/lon. 383 # @param isSouthHemisphere If True, return lat/lon points in S.Hemi. 384 # @return A set of data points, in appropriate form. 362 385 def get_data_points(self, absolute=True, geo_reference=None, 363 386 as_lat_long=False, isSouthHemisphere=True): … … 370 393 If a geo_reference is passed the points are returned relative 371 394 to that geo_reference. 372 373 isSH (isSouthHemisphere) is only used when getting data 374 points "as_lat_long" is True and if FALSE will return lats and 395 396 isSH (isSouthHemisphere) is only used when getting data 397 points "as_lat_long" is True and if FALSE will return lats and 375 398 longs valid for the Northern Hemisphere. 376 399 377 400 Default: absolute is True. 378 401 """ 402 379 403 if as_lat_long is True: 380 404 msg = "Points need a zone to be converted into lats and longs" … … 384 408 lats_longs = [] 385 409 for point in self.get_data_points(True): 386 387 410 # UTMtoLL(northing, easting, zone, 388 lat_calced, long_calced = UTMtoLL(point[1], point[0],411 lat_calced, long_calced = UTMtoLL(point[1], point[0], 389 412 zone, isSouthHemisphere) 390 413 lats_longs.append((lat_calced, long_calced)) # to hash 391 414 return lats_longs 392 415 393 416 if absolute is True and geo_reference is None: 394 417 return self.geo_reference.get_absolute(self.data_points) 395 418 elif geo_reference is not None: 396 return geo_reference.change_points_geo_ref(self.data_points, 419 return geo_reference.change_points_geo_ref(self.data_points, 397 420 self.geo_reference) 398 421 else: 399 # If absolute is False 422 # If absolute is False 400 423 return self.data_points 401 424 402 425 ## 426 # @brief Get value for attribute name. 427 # @param attribute_name Name to get value for. 428 # @note If name passed is None, return default attribute value. 403 429 def get_attributes(self, attribute_name=None): 404 430 """Return values for one named attribute. … … 411 437 attribute_name = self.default_attribute_name 412 438 else: 413 attribute_name = self.attributes.keys()[0] 439 attribute_name = self.attributes.keys()[0] 414 440 # above line takes the first one from keys 415 441 416 442 if self.verbose is True: 417 443 print 'Using attribute %s' %attribute_name 418 print 'Available attributes: %s' %(self.attributes.keys()) 419 420 msg = 'Attribute name %s does not exist in data set' % attribute_name444 print 'Available attributes: %s' %(self.attributes.keys()) 445 446 msg = 'Attribute name %s does not exist in data set' % attribute_name 421 447 assert self.attributes.has_key(attribute_name), msg 422 448 423 449 return self.attributes[attribute_name] 424 450 451 ## 452 # @brief Get all instance attributes. 453 # @return The instance attribute dictionary, or None if no attributes. 425 454 def get_all_attributes(self): 426 """ 427 Return values for all attributes. 455 """Return values for all attributes. 428 456 The return value is either None or a dictionary (possibly empty). 429 457 """ … … 431 459 return self.attributes 432 460 461 ## 462 # @brief Override __add__() to allow addition of geospatial objects. 463 # @param self This object. 464 # @param other The second object. 465 # @return The new geospatial object. 433 466 def __add__(self, other): 434 """ 435 Returns the addition of 2 geospatical objects, 467 """Returns the addition of 2 geospatical objects, 436 468 objects are concatencated to the end of each other 437 438 NOTE: doesn't add if objects contain different 439 attributes 440 469 470 NOTE: doesn't add if objects contain different 471 attributes 472 441 473 Always return absolute points! 442 This als emeans, that if you add None to the object,474 This also means, that if you add None to the object, 443 475 it will be turned into absolute coordinates 444 476 445 477 other can be None in which case nothing is added to self. 446 478 """ 447 448 479 449 480 # find objects zone and checks if the same … … 451 482 zone1 = geo_ref1.get_zone() 452 483 453 454 484 if other is not None: 455 456 485 geo_ref2 = other.get_geo_reference() 457 486 zone2 = geo_ref2.get_zone() 458 459 487 geo_ref1.reconcile_zones(geo_ref2) 460 461 462 488 new_points = concatenate((self.get_data_points(absolute=True), 463 489 other.get_data_points(absolute=True)), 464 axis = 0) 465 466 467 490 axis = 0) 491 468 492 # Concatenate attributes if any 469 493 if self.attributes is None: 470 494 if other.attributes is not None: 471 msg = 'Geospatial data must have the same \n'495 msg = 'Geospatial data must have the same ' 472 496 msg += 'attributes to allow addition.' 473 497 raise Exception, msg 474 498 475 499 new_attributes = None 476 else: 500 else: 477 501 new_attributes = {} 478 502 for x in self.attributes.keys(): 479 503 if other.attributes.has_key(x): 480 481 504 attrib1 = self.attributes[x] 482 505 attrib2 = other.attributes[x] 483 506 new_attributes[x] = concatenate((attrib1, attrib2)) 484 485 507 else: 486 508 msg = 'Geospatial data must have the same \n' 487 509 msg += 'attributes to allow addition.' 488 510 raise Exception, msg 489 490 491 511 else: 492 512 #other is None: 493 494 513 new_points = self.get_data_points(absolute=True) 495 514 new_attributes = self.attributes 496 515 497 498 499 516 # Instantiate new data object and return absolute coordinates 500 517 new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0) 501 return Geospatial_data(new_points, 502 new_attributes, 503 new_geo_ref) 504 505 518 return Geospatial_data(new_points, new_attributes, new_geo_ref) 519 520 ## 521 # @brief Override the addition case where LHS isn't geospatial object. 522 # @param self This object. 523 # @param other The second object. 524 # @return The new geospatial object. 506 525 def __radd__(self, other): 507 526 """Handle cases like None + Geospatial_data(...) … … 510 529 return self + other 511 530 512 513 514 531 ### 515 532 # IMPORT/EXPORT POINTS FILES 516 533 ### 517 534 535 ## 536 # @brief Import a .txt, .csv or .pts points data file. 537 # @param file_name 538 # @param delimiter 539 # @param verbose True if this function is to be verbose. 540 # @note Will throw IOError or SyntaxError if there is a problem. 518 541 def import_points_file(self, file_name, delimiter=None, verbose=False): 519 542 """ load an .txt, .csv or .pts file … … 523 546 Post condition: self.attributes dictionary has been set 524 547 """ 525 548 526 549 if access(file_name, F_OK) == 0 : 527 msg = 'File %s does not exist or is not accessible' % file_name550 msg = 'File %s does not exist or is not accessible' % file_name 528 551 raise IOError, msg 529 552 530 553 attributes = {} 531 554 if file_name[-4:]== ".pts": 532 555 try: 533 data_points, attributes, geo_reference = \556 data_points, attributes, geo_reference = \ 534 557 _read_pts_file(file_name, verbose) 535 except IOError, e: 536 msg = 'Could not open file %s ' %file_name 537 raise IOError, msg 538 558 except IOError, e: 559 msg = 'Could not open file %s ' % file_name 560 raise IOError, msg 539 561 elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv": 540 562 try: 541 data_points, attributes, geo_reference = \563 data_points, attributes, geo_reference = \ 542 564 _read_csv_file(file_name, verbose) 543 565 except IOError, e: 544 566 # This should only be if a file is not found 545 msg = 'Could not open file %s. ' % file_name567 msg = 'Could not open file %s. ' % file_name 546 568 msg += 'Check the file location.' 547 569 raise IOError, msg … … 550 572 msg = 'Could not open file %s. \n' %file_name 551 573 msg += Error_message['IOError'] 552 #print "msg", msg553 574 raise SyntaxError, msg 554 else: 575 else: 555 576 msg = 'Extension %s is unknown' %file_name[-4:] 556 577 raise IOError, msg 557 # print'in import data_points', data_points 558 # print'in import attributes', attributes 559 # print'in import data_points', geo_reference 578 560 579 self.data_points = data_points 561 580 self.attributes = attributes 562 581 self.geo_reference = geo_reference 563 564 # return all_data 565 566 def export_points_file(self, file_name, absolute=True, 582 583 ## 584 # @brief Write points data to a file (.csv or .pts). 585 # @param file_name Path to file to write. 586 # @param absolute ?? 587 # @param as_lat_long ?? 588 # @param isSouthHemisphere ?? 589 def export_points_file(self, file_name, absolute=True, 567 590 as_lat_long=False, isSouthHemisphere=True): 568 591 569 592 """ 570 593 write a points file, file_name, as a text (.csv) or binary (.pts) file 571 594 file_name is the file name, including the extension 572 595 The point_dict is defined at the top of this file. 573 574 If absolute is True data the xll and yll are added to the points value 596 597 If absolute is True data the xll and yll are added to the points value 575 598 and the xll and yll of the geo_reference are set to 0. 576 577 If absolute is False data points at returned as relative to the xll 599 600 If absolute is False data points at returned as relative to the xll 578 601 and yll and geo_reference remains uneffected 579 580 isSouthHemisphere: is only used when getting data 581 points "as_lat_long" is True and if FALSE will return lats and 602 603 isSouthHemisphere: is only used when getting data 604 points "as_lat_long" is True and if FALSE will return lats and 582 605 longs valid for the Northern Hemisphere. 583 584 606 """ 585 607 … … 590 612 geo_ref.yllcorner = 0 591 613 _write_pts_file(file_name, 592 self.get_data_points(absolute), 614 self.get_data_points(absolute), 593 615 self.get_all_attributes(), 594 616 geo_ref) 595 617 else: 596 618 _write_pts_file(file_name, 597 self.get_data_points(absolute), 619 self.get_data_points(absolute), 598 620 self.get_all_attributes(), 599 621 self.get_geo_reference()) 600 622 601 623 elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv": 602 624 msg = "ERROR: trying to write a .txt file with relative data." … … 605 627 self.get_data_points(absolute=True, 606 628 as_lat_long=as_lat_long, 607 isSouthHemisphere=isSouthHemisphere), 629 isSouthHemisphere=isSouthHemisphere), 608 630 self.get_all_attributes(), 609 631 as_lat_long=as_lat_long) 610 632 611 633 elif file_name[-4:] == ".urs" : 612 634 msg = "ERROR: Can not write a .urs file as a relative file." … … 615 637 self.get_data_points(as_lat_long=True, 616 638 isSouthHemisphere=isSouthHemisphere)) 617 639 618 640 else: 619 641 msg = 'Unknown file type %s ' %file_name 620 raise IOError, msg 621 642 raise IOError, msg 643 644 ## 645 # @brief Get a subset of data that is referred to by 'indices'. 646 # @param indices A list of indices to select data subset with. 647 # @return A geospatial object containing data subset. 622 648 def get_sample(self, indices): 623 649 """ Returns a object which is a subset of the original 624 650 and the data points and attributes in this new object refer to 625 651 the indices provided 626 652 627 653 Input 628 654 indices- a list of integers that represent the new object 629 655 Output 630 New geospatial data object representing points specified by 631 the indices 632 """ 656 New geospatial data object representing points specified by 657 the indices 658 """ 659 633 660 #FIXME: add the geo_reference to this 634 # print 'hello from get_sample'635 661 points = self.get_data_points() 636 662 sampled_points = take(points, indices) … … 643 669 sampled_attributes[key] = take(att, indices) 644 670 645 # print 'goodbye from get_sample'646 647 671 return Geospatial_data(sampled_points, sampled_attributes) 648 649 650 def split(self, factor=0.5,seed_num=None, verbose=False): 651 """Returns two 672 673 ## 674 # @brief Split one geospatial object into two. 675 # @param factor Relative size to make first result object. 676 # @param seed_num Random 'seed' - used only for unit test. 677 # @param verbose True if this function is to be verbose. 678 # @note Points in each result object are selected randomly. 679 def split(self, factor=0.5, seed_num=None, verbose=False): 680 """Returns two 652 681 geospatial_data object, first is the size of the 'factor' 653 682 smaller the original and the second is the remainder. The two 654 683 new object are disjoin set of each other. 655 656 Points of the two new object have selected RANDOMLY. 657 684 685 Points of the two new object have selected RANDOMLY. 686 658 687 This method create two lists of indices which are passed into 659 688 get_sample. The lists are created using random numbers, and 660 689 they are unique sets eg. total_list(1,2,3,4,5,6,7,8,9) 661 690 random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8) 662 663 Input - the factor which to split the object, if 0.1 then 10% of the664 together object will be returned665 666 Output - two geospatial_data objects that are disjoint sets of the 667 original668 669 691 692 Input - the factor which to split the object, if 0.1 then 10% of the 693 together object will be returned 694 695 Output - two geospatial_data objects that are disjoint sets of the 696 original 697 """ 698 670 699 i=0 671 700 self_size = len(self) … … 673 702 remainder_list = [] 674 703 new_size = round(factor*self_size) 675 704 676 705 # Find unique random numbers 677 706 if verbose: print "make unique random number list and get indices" … … 679 708 total=array(range(self_size)) 680 709 total_list = total.tolist() 681 if verbose: print "total list len",len(total_list) 682 683 # There will be repeated random numbers however will not be a 710 711 if verbose: print "total list len", len(total_list) 712 713 # There will be repeated random numbers however will not be a 684 714 # problem as they are being 'pop'ed out of array so if there 685 # are two numbers the same they will pop different indicies, 715 # are two numbers the same they will pop different indicies, 686 716 # still basically random 687 717 ## create list of non-unquie random numbers 688 718 if verbose: print "create random numbers list %s long" %new_size 689 719 690 720 # Set seed if provided, mainly important for unit test! 691 721 # plus recalcule seed when no seed provided. 692 722 if seed_num != None: 693 seed(seed_num, seed_num)723 seed(seed_num, seed_num) 694 724 else: 695 725 seed() 726 696 727 if verbose: print "seed:", get_seed() 697 698 #print 'size',self_size, new_size 699 random_num = randint(0,self_size-1,(int(new_size),)) 700 #print 'random num',random_num 728 729 random_num = randint(0, self_size-1, (int(new_size),)) 701 730 random_num = random_num.tolist() 702 731 703 732 #need to sort and reverse so the pop() works correctly 704 733 random_num.sort() 705 random_num.reverse() 706 734 random_num.reverse() 735 707 736 if verbose: print "make random number list and get indices" 737 708 738 j=0 709 739 k=1 710 740 remainder_list = total_list[:] 741 711 742 # pops array index (random_num) from remainder_list 712 # (which starts as the 713 # total_list and appends to random_list 743 # (which starts as the 744 # total_list and appends to random_list 714 745 random_num_len = len(random_num) 715 746 for i in random_num: 716 747 random_list.append(remainder_list.pop(i)) 717 j +=1748 j += 1 718 749 #prints progress 719 if verbose and round(random_num_len/10*k) ==j:720 print '(%s/%s)' % (j, random_num_len)721 k +=1722 750 if verbose and round(random_num_len/10*k) == j: 751 print '(%s/%s)' % (j, random_num_len) 752 k += 1 753 723 754 # FIXME: move to tests, it might take a long time 724 # then create an array of random lenght between 500 and 1000, 725 # and use a random factor between 0 and 1 755 # then create an array of random lenght between 500 and 1000, 756 # and use a random factor between 0 and 1 726 757 # setup for assertion 727 758 test_total = random_list[:] 728 759 test_total.extend(remainder_list) 729 test_total.sort() 730 msg = 'The two random lists made from the original list when added ' \731 'together DO NOT equal the original list'732 assert (total_list==test_total),msg760 test_total.sort() 761 msg = 'The two random lists made from the original list when added ' \ 762 'together DO NOT equal the original list' 763 assert total_list == test_total, msg 733 764 734 765 # Get new samples … … 740 771 return G1, G2 741 772 773 ## 774 # @brief Allow iteration over this object. 742 775 def __iter__(self): 743 776 """Read in the header, number_of_points and save the … … 745 778 """ 746 779 from Scientific.IO.NetCDF import NetCDFFile 747 #print "Starting to block"748 780 #FIXME - what to do if the file isn't there 749 781 750 782 # FIXME (Ole): Shouldn't this go into the constructor? 751 # This method acts like the constructor when bl coking.783 # This method acts like the constructor when blocking. 752 784 # ... and shouldn't it be called block_size? 753 # 785 # 754 786 if self.max_read_lines is None: 755 787 self.max_read_lines = MAX_READ_LINES 788 756 789 if self.file_name[-4:] == ".pts": 757 758 790 # See if the file is there. Throw a QUIET IO error if it isn't 759 791 fd = open(self.file_name,'r') 760 792 fd.close() 761 793 762 794 # Throws prints to screen if file not present 763 795 self.fid = NetCDFFile(self.file_name, 'r') 764 765 self.blocking_georef, self.blocking_keys, self.number_of_points =\ 766 _read_pts_file_header(self.fid, 767 self.verbose) 796 797 (self.blocking_georef, 798 self.blocking_keys, 799 self.number_of_points) = _read_pts_file_header(self.fid, 800 self.verbose) 768 801 self.start_row = 0 769 self.last_row = self.number_of_points 802 self.last_row = self.number_of_points 770 803 self.show_verbose = 0 771 804 self.verbose_block_size = (self.last_row + 10)/10 … … 774 807 # This computes the number of full blocks. The last block may be 775 808 # smaller and won't be ircluded in this estimate. 776 809 777 810 if self.verbose is True: 778 print 'Reading %d points (in ~%d blocks) from file %s. ' \779 % (self.number_of_points,780 self.number_of_blocks,781 self.file_name),782 print 'Each block consists of %d data points' \783 % self.max_read_lines784 811 print 'Reading %d points (in ~%d blocks) from file %s. ' \ 812 % (self.number_of_points, 813 self.number_of_blocks, 814 self.file_name), 815 print 'Each block consists of %d data points' \ 816 % self.max_read_lines 817 785 818 else: 786 819 # Assume the file is a csv file 787 820 file_pointer = open(self.file_name) 788 self.header, self.file_pointer = \ 789 _read_csv_file_header(file_pointer) 821 self.header, self.file_pointer = _read_csv_file_header(file_pointer) 790 822 self.blocking_georef = None # Used for reconciling zones 791 823 792 824 return self 793 794 825 826 ## 827 # @brief Read another block into the instance. 795 828 def next(self): 796 """ 797 829 """read a block, instanciate a new geospatial and return it""" 830 798 831 if self.file_name[-4:] == ".pts": 799 832 if self.start_row == self.last_row: … … 812 845 fin_row = self.last_row 813 846 814 815 816 847 if self.verbose is True: 817 if self.show_verbose >= self.start_row and \ 818 self.show_verbose < fin_row: 819 820 821 #print 'Doing %d of %d' %(self.start_row, self.last_row+10) 822 848 if self.show_verbose >= self.start_row \ 849 and self.show_verbose < fin_row: 823 850 print 'Reading block %d (points %d to %d) out of %d'\ 824 851 %(self.block_number, … … 827 854 self.number_of_blocks) 828 855 829 830 856 self.show_verbose += max(self.max_read_lines, 831 857 self.verbose_block_size) 832 858 833 859 834 860 # Read next block 835 pointlist, att_dict, = \ 836 _read_pts_file_blocking(self.fid, 837 self.start_row, 838 fin_row, 839 self.blocking_keys) 840 861 pointlist, att_dict, = _read_pts_file_blocking(self.fid, 862 self.start_row, 863 fin_row, 864 self.blocking_keys) 865 841 866 geo = Geospatial_data(pointlist, att_dict, self.blocking_georef) 842 867 self.start_row = fin_row 843 844 self.block_number += 1 845 868 869 self.block_number += 1 870 846 871 else: 847 872 # Assume the file is a csv file 848 873 try: 849 #print "self.max_read_lines", self.max_read_lines 850 pointlist, att_dict, geo_ref, self.file_pointer = \ 851 _read_csv_file_blocking( self.file_pointer, 852 self.header[:], 853 max_read_lines=self.max_read_lines, 854 verbose=self.verbose) 874 (pointlist, 875 att_dict, 876 geo_ref, 877 self.file_pointer) = \ 878 _read_csv_file_blocking(self.file_pointer, 879 self.header[:], 880 max_read_lines=\ 881 self.max_read_lines, 882 verbose=self.verbose) 855 883 856 884 # Check that the zones haven't changed. … … 859 887 self.blocking_georef = geo_ref 860 888 elif self.blocking_georef is not None: 861 862 889 msg = 'Geo reference given, then not given.' 863 msg += ' This should not happen.' 890 msg += ' This should not happen.' 864 891 raise ValueError, msg 865 892 geo = Geospatial_data(pointlist, att_dict, geo_ref) … … 879 906 del self.file_pointer 880 907 # This should only be if there is a format error 881 msg = 'Could not open file %s. \n' % self.file_name908 msg = 'Could not open file %s. \n' % self.file_name 882 909 msg += Error_message['IOError'] 883 910 raise SyntaxError, msg 884 911 return geo 885 912 886 913 887 914 ##################### Error messages ########### 888 915 Error_message = {} … … 901 928 Em['IOError'] += "The attribute values must be numeric.\n" 902 929 930 ## 931 # @brief ?? 932 # @param latitudes ?? 933 # @param longitudes ?? 934 # @param geo_reference ?? 935 # @param data_points ?? 936 # @param points_are_lats_longs ?? 903 937 def _set_using_lat_long(latitudes, 904 938 longitudes, … … 909 943 if the points has lat long info, assume it is in (lat, long) order. 910 944 """ 911 945 912 946 if geo_reference is not None: 913 msg = " ""A georeference is specified yet latitude and longitude914 are also specified!"""947 msg = "A georeference is specified yet latitude and longitude " \ 948 "are also specified!" 915 949 raise ValueError, msg 916 950 917 951 if data_points is not None and not points_are_lats_longs: 918 msg = " ""Data points are specified yet latitude and919 longitude are also specified."""952 msg = "Data points are specified yet latitude and longitude are " \ 953 "also specified." 920 954 raise ValueError, msg 921 955 922 956 if points_are_lats_longs: 923 957 if data_points is None: 924 msg = " ""Data points are not specified."""958 msg = "Data points are not specified." 925 959 raise ValueError, msg 926 960 lats_longs = ensure_numeric(data_points) 927 961 latitudes = ravel(lats_longs[:,0:1]) 928 962 longitudes = ravel(lats_longs[:,1:]) 929 963 930 964 if latitudes is None and longitudes is None: 931 msg = " ""Latitudes and Longitudes are not specified."""965 msg = "Latitudes and Longitudes are not specified." 932 966 raise ValueError, msg 933 967 934 968 if latitudes is None: 935 msg = " ""Longitudes are specified yet latitudes aren't."""969 msg = "Longitudes are specified yet latitudes aren't." 936 970 raise ValueError, msg 937 971 938 972 if longitudes is None: 939 msg = " ""Latitudes are specified yet longitudes aren't."""973 msg = "Latitudes are specified yet longitudes aren't." 940 974 raise ValueError, msg 941 975 942 976 data_points, zone = convert_from_latlon_to_utm(latitudes=latitudes, 943 977 longitudes=longitudes) 944 978 return data_points, Geo_reference(zone=zone) 945 979 946 980 981 ## 982 # @brief Read a .pts data file. 983 # @param file_name Path to file to read. 984 # @param verbose True if this function is to be verbose. 985 # @return (pointlist, attributes, geo_reference) 947 986 def _read_pts_file(file_name, verbose=False): 948 987 """Read .pts NetCDF file 949 988 950 989 Return a dic of array of points, and dic of array of attribute 951 990 eg 952 991 dic['points'] = [[1.0,2.0],[3.0,5.0]] 953 dic['attributelist']['elevation'] = [[7.0,5.0] 954 """ 992 dic['attributelist']['elevation'] = [[7.0,5.0]] 993 """ 955 994 956 995 from Scientific.IO.NetCDF import NetCDFFile 957 996 958 997 if verbose: print 'Reading ', file_name 959 960 998 961 999 # See if the file is there. Throw a QUIET IO error if it isn't 962 1000 fd = open(file_name,'r') 963 1001 fd.close() 964 1002 965 1003 # Throws prints to screen if file not present 966 fid = NetCDFFile(file_name, 'r') 967 1004 fid = NetCDFFile(file_name, 'r') 1005 968 1006 pointlist = array(fid.variables['points']) 969 1007 keys = fid.variables.keys() 970 if verbose: print 'Got %d variables: %s' %(len(keys), keys) 1008 1009 if verbose: print 'Got %d variables: %s' % (len(keys), keys) 1010 971 1011 try: 972 1012 keys.remove('points') 973 except IOError, e: 974 fid.close() 975 msg = 'Expected keyword "points" but could not find it'1013 except IOError, e: 1014 fid.close() 1015 msg = "Expected keyword 'points' but could not find it" 976 1016 raise IOError, msg 977 1017 978 1018 attributes = {} 979 1019 for key in keys: 980 if verbose: print "reading attribute '%s'" % key981 1020 if verbose: print "reading attribute '%s'" % key 1021 982 1022 attributes[key] = array(fid.variables[key]) 983 984 1023 985 1024 try: 986 1025 geo_reference = Geo_reference(NetCDFObject=fid) 987 1026 except AttributeError, e: 988 1027 geo_reference = None 989 1028 990 1029 fid.close() 991 1030 992 1031 return pointlist, attributes, geo_reference 993 1032 994 1033 1034 ## 1035 # @brief Read a .csv data file. 1036 # @param file_name Path to the .csv file to read. 1037 # @param verbose True if this function is to be verbose. 995 1038 def _read_csv_file(file_name, verbose=False): 996 1039 """Read .csv file 997 1040 998 1041 Return a dic of array of points, and dic of array of attribute 999 1042 eg 1000 1043 dic['points'] = [[1.0,2.0],[3.0,5.0]] 1001 dic['attributelist']['elevation'] = [[7.0,5.0] 1002 """ 1003 1044 dic['attributelist']['elevation'] = [[7.0,5.0]] 1045 """ 1046 1004 1047 file_pointer = open(file_name) 1005 1048 header, file_pointer = _read_csv_file_header(file_pointer) 1006 1049 try: 1007 pointlist, att_dict, geo_ref, file_pointer = \ 1008 _read_csv_file_blocking( \ 1009 file_pointer, 1010 header, 1011 max_read_lines=1e30) #If the file is bigger that this, block.. # FIXME (Ole) What's up here? 1012 1050 (pointlist, 1051 att_dict, 1052 geo_ref, 1053 file_pointer) = _read_csv_file_blocking(file_pointer, 1054 header, 1055 max_read_lines=1e30) 1056 #If the file is bigger that this, block.. 1057 # FIXME (Ole) What's up here? 1013 1058 except ANUGAError: 1014 1059 file_pointer.close() 1015 raise 1060 raise 1061 1016 1062 file_pointer.close() 1017 return pointlist, att_dict, geo_ref 1063 1064 return pointlist, att_dict, geo_ref 1065 1066 1067 ## 1068 # @brief Read a .csv file header. 1069 # @param file_pointer Open descriptor of the file to read. 1070 # @param delimiter Header line delimiter string, split on this string. 1071 # @param verbose True if this function is to be verbose. 1072 # @return A tuple of (<cleaned header string>, <input file_pointer>) 1018 1073 1019 1074 CSV_DELIMITER = ',' 1075 1020 1076 def _read_csv_file_header(file_pointer, 1021 1077 delimiter=CSV_DELIMITER, 1022 1078 verbose=False): 1023 1024 1079 """Read the header of a .csv file 1025 1080 Return a list of the header names 1026 1081 """ 1082 1027 1083 line = file_pointer.readline() 1028 1084 header = clean_line(line, delimiter) 1085 1029 1086 return header, file_pointer 1030 1087 1088 ## 1089 # @brief Read a .csv file, with blocking. 1090 # @param file_pointer Open descriptor of the file to read. 1091 # @param header List of already read .csv header fields. 1092 # @param delimiter Delimiter string header was split on. 1093 # @param max_read_lines The max number of lines to read before blocking. 1094 # @param verbose True if this function is to be verbose. 1095 # @note Will throw IndexError, SyntaxError exceptions. 1031 1096 def _read_csv_file_blocking(file_pointer, header, 1032 1097 delimiter=CSV_DELIMITER, 1033 1098 max_read_lines=MAX_READ_LINES, 1034 1099 verbose=False): 1035 1036 1037 """ 1038 Read the body of a .csv file. 1100 """Read the body of a .csv file. 1039 1101 header: The list header of the csv file, with the x and y labels. 1040 1102 """ 1103 1041 1104 points = [] 1042 1105 pointattributes = [] … … 1052 1115 # eg if it is a space seperated file 1053 1116 raise SyntaxError 1054 1117 1055 1118 read_lines = 0 1056 while read_lines <max_read_lines:1119 while read_lines < max_read_lines: 1057 1120 line = file_pointer.readline() 1058 #print "line",line 1059 numbers = clean_line(line,delimiter) 1121 numbers = clean_line(line, delimiter) 1060 1122 if len(numbers) <= 1: 1061 1123 break 1062 1124 if line[0] == '#': 1063 1125 continue 1126 1064 1127 read_lines += 1 1128 1065 1129 try: 1066 1130 x = float(numbers[0]) … … 1070 1134 numbers.pop(0) 1071 1135 if len(header) != len(numbers): 1072 file_pointer.close() 1073 msg = "File load error. \1074 There might be a problem with the file header"1136 file_pointer.close() 1137 msg = "File load error. " \ 1138 "There might be a problem with the file header." 1075 1139 raise SyntaxError, msg 1076 1140 for i,num in enumerate(numbers): … … 1079 1143 #attributes.append(float(num)) 1080 1144 att_dict.setdefault(header[i],[]).append(float(num)) 1081 #except IOError:1082 1145 except ValueError: 1083 1146 raise SyntaxError 1147 1084 1148 if points == []: 1085 1149 raise StopIteration 1086 1087 1150 1088 1151 pointlist = array(points).astype(Float) 1089 1152 for key in att_dict.keys(): … … 1094 1157 x_header = lower(x_header[:3]) 1095 1158 y_header = lower(y_header[:3]) 1096 if (x_header == 'lon' or x_header == 'lat') and\1097 (y_header == 'lon' or y_header == 'lat'):1159 if (x_header == 'lon' or x_header == 'lat') \ 1160 and (y_header == 'lon' or y_header == 'lat'): 1098 1161 if x_header == 'lon': 1099 1162 longitudes = ravel(pointlist[:,0:1]) … … 1102 1165 latitudes = ravel(pointlist[:,0:1]) 1103 1166 longitudes = ravel(pointlist[:,1:]) 1104 1167 1105 1168 pointlist, geo_ref = _set_using_lat_long(latitudes, 1106 1169 longitudes, … … 1108 1171 data_points=None, 1109 1172 points_are_lats_longs=False) 1110 return pointlist, att_dict, geo_ref, file_pointer 1111 1173 1174 return pointlist, att_dict, geo_ref, file_pointer 1175 1176 1177 ## 1178 # @brief Read a .pts file header. 1179 # @param fid Handle to the open .pts file. 1180 # @param verbose True if the function is to be verbose. 1181 # @return (geo_reference, keys, fid.dimensions['number_of_points']) 1182 # @note Will throw IOError and AttributeError exceptions. 1112 1183 def _read_pts_file_header(fid, verbose=False): 1113 1114 1184 """Read the geo_reference and number_of_points from a .pts file 1115 1185 """ 1116 1186 1117 1187 keys = fid.variables.keys() 1118 1188 try: 1119 1189 keys.remove('points') 1120 except IOError, e: 1121 fid.close() 1122 msg = 'Expected keyword "points" but could not find it'1190 except IOError, e: 1191 fid.close() 1192 msg = "Expected keyword 'points' but could not find it." 1123 1193 raise IOError, msg 1124 if verbose: print 'Got %d variables: %s' %(len(keys), keys) 1125 1194 1195 if verbose: print 'Got %d variables: %s' % (len(keys), keys) 1196 1126 1197 try: 1127 1198 geo_reference = Geo_reference(NetCDFObject=fid) … … 1131 1202 return geo_reference, keys, fid.dimensions['number_of_points'] 1132 1203 1204 1205 ## 1206 # @brief Read the body of a .csf file, with blocking. 1207 # @param fid Handle to already open file. 1208 # @param start_row Start row index of points to return. 1209 # @param fin_row End row index of points to return. 1210 # @param keys Iterable of keys to return. 1211 # @return Tuple of (pointlist, attributes). 1133 1212 def _read_pts_file_blocking(fid, start_row, fin_row, keys): 1134 #verbose=False): 1135 1136 1137 """ 1138 Read the body of a .csv file. 1139 header: The list header of the csv file, with the x and y labels. 1140 """ 1141 1213 """Read the body of a .csv file. 1214 """ 1215 1142 1216 pointlist = array(fid.variables['points'][start_row:fin_row]) 1143 1217 1144 1218 attributes = {} 1145 1219 for key in keys: … … 1147 1221 1148 1222 return pointlist, attributes 1149 1223 1224 1225 ## 1226 # @brief Write a .pts data file. 1227 # @param file_name Path to the file to write. 1228 # @param write_data_points Data points to write. 1229 # @param write_attributes Attributes to write. 1230 # @param write_geo_reference Georef to write. 1150 1231 def _write_pts_file(file_name, 1151 1232 write_data_points, 1152 write_attributes=None, 1233 write_attributes=None, 1153 1234 write_geo_reference=None): 1154 """ 1155 Write .pts NetCDF file 1235 """Write .pts NetCDF file 1156 1236 1157 1237 NOTE: Below might not be valid ask Duncan : NB 5/2006 1158 1238 1159 1239 WARNING: This function mangles the point_atts data structure 1160 1240 #F??ME: (DSG)This format has issues. 1161 # There can't be an attribute called points 1241 # There can't be an attribute called points 1162 1242 # consider format change 1163 1243 # method changed by NB not sure if above statement is correct 1164 1244 1165 1245 should create new test for this 1166 1246 legal_keys = ['pointlist', 'attributelist', 'geo_reference'] 1167 1247 for key in point_atts.keys(): 1168 msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys) 1248 msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys) 1169 1249 assert key in legal_keys, msg 1170 """ 1250 """ 1251 1171 1252 from Scientific.IO.NetCDF import NetCDFFile 1253 1172 1254 # NetCDF file definition 1173 1255 outfile = NetCDFFile(file_name, 'w') 1174 1256 1175 1257 # Create new file 1176 1258 outfile.institution = 'Geoscience Australia' 1177 outfile.description = 'NetCDF format for compact and portable storage ' +\1259 outfile.description = 'NetCDF format for compact and portable storage ' \ 1178 1260 'of spatial point data' 1179 1261 1180 1262 # Dimension definitions 1181 1263 shape = write_data_points.shape[0] 1182 outfile.createDimension('number_of_points', shape) 1264 outfile.createDimension('number_of_points', shape) 1183 1265 outfile.createDimension('number_of_dimensions', 2) #This is 2d data 1184 1266 1185 1267 # Variable definition 1186 1268 outfile.createVariable('points', Float, ('number_of_points', 1187 1269 'number_of_dimensions')) 1188 1270 1189 #create variables 1271 #create variables 1190 1272 outfile.variables['points'][:] = write_data_points #.astype(Float32) 1191 1273 … … 1194 1276 outfile.createVariable(key, Float, ('number_of_points',)) 1195 1277 outfile.variables[key][:] = write_attributes[key] #.astype(Float32) 1196 1278 1197 1279 if write_geo_reference is not None: 1198 1280 write_NetCDF_georeference(write_geo_reference, outfile) 1199 1200 outfile.close() 1201 1281 1282 outfile.close() 1283 1284 1285 ## 1286 # @brief Write a .csv data file. 1287 # @param file_name Path to the file to write. 1288 # @param write_data_points Data points to write. 1289 # @param write_attributes Attributes to write. 1290 # @param as_lat_long True if points are lat/lon, else x/y. 1291 # @param delimiter The CSV delimiter to use. 1202 1292 def _write_csv_file(file_name, 1203 1293 write_data_points, … … 1205 1295 as_lat_long=False, 1206 1296 delimiter=','): 1207 """ 1208 export a file, file_name, with the csv format 1209 1210 """ 1211 points = write_data_points 1297 """Write a .csv file. 1298 """ 1299 1300 points = write_data_points 1212 1301 pointattributes = write_attributes 1213 1214 fd = open(file_name,'w') 1302 1303 fd = open(file_name, 'w') 1304 1215 1305 if as_lat_long: 1216 1306 titlelist = "latitude" + delimiter + "longitude" + delimiter 1217 1307 else: 1218 1308 titlelist = "x" + delimiter + "y" + delimiter 1219 if pointattributes is not None: 1309 1310 if pointattributes is not None: 1220 1311 for title in pointattributes.keys(): 1221 1312 titlelist = titlelist + title + delimiter 1222 1313 titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter 1223 fd.write(titlelist+"\n") 1224 1314 1315 fd.write(titlelist + "\n") 1316 1225 1317 # <x/lat> <y/long> [attributes] 1226 1318 for i, vert in enumerate( points): 1227 1228 1229 if pointattributes is not None: 1319 if pointattributes is not None: 1230 1320 attlist = "," 1231 1321 for att in pointattributes.keys(): 1232 attlist = attlist + str(pointattributes[att][i]) + delimiter1322 attlist = attlist + str(pointattributes[att][i]) + delimiter 1233 1323 attlist = attlist[0:-len(delimiter)] # remove the last delimiter 1234 1324 attlist.strip() … … 1236 1326 attlist = '' 1237 1327 1238 fd.write(str(vert[0]) + delimiter + 1239 str(vert[1]) + attlist + "\n") 1328 fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n") 1240 1329 1241 1330 fd.close() 1242 1243 def _write_urs_file(file_name, 1244 points, 1245 delimiter=' '): 1246 """ 1331 1332 1333 ## 1334 # @brief Write a URS file. 1335 # @param file_name The path of the file to write. 1336 # @param points 1337 # @param delimiter 1338 def _write_urs_file(file_name, points, delimiter=' '): 1339 """Write a URS format file. 1247 1340 export a file, file_name, with the urs format 1248 1341 the data points are in lats and longs 1249 1250 """ 1251 fd = open(file_name,'w') 1252 fd.write(str(len(points))+"\n") 1342 """ 1343 1344 fd = open(file_name, 'w') 1345 1346 # first line is # points 1347 fd.write(str(len(points)) + "\n") 1348 1253 1349 # <lat> <long> <id#> 1254 1350 for i, vert in enumerate( points): 1255 fd.write(str(round(vert[0],7)) + delimiter + \ 1256 str(round(vert[1],7)) + delimiter +str(i)+ "\n") 1351 fd.write(str(round(vert[0],7)) + delimiter + 1352 str(round(vert[1],7)) + delimiter + str(i) + "\n") 1353 1257 1354 fd.close() 1258 1355 1356 1357 ## 1358 # @brief ?? 1359 # @param point_atts ?? 1360 # @return ?? 1259 1361 def _point_atts2array(point_atts): 1260 1362 point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float) 1261 1363 1262 1364 for key in point_atts['attributelist'].keys(): 1263 point_atts['attributelist'][key] =\1365 point_atts['attributelist'][key] = \ 1264 1366 array(point_atts['attributelist'][key]).astype(Float) 1367 1265 1368 return point_atts 1266 1369 1267 1268 1269 1370 1371 ## 1372 # @brief Convert geospatial object to a points dictionary. 1373 # @param geospatial_data The geospatial object to convert. 1374 # @return A points dictionary. 1270 1375 def geospatial_data2points_dictionary(geospatial_data): 1271 1376 """Convert geospatial data to points_dictionary … … 1285 1390 return points_dictionary 1286 1391 1287 1392 1393 ## 1394 # @brief Convert a points dictionary to a geospatial object. 1395 # @param points_dictionary A points dictionary to convert. 1288 1396 def points_dictionary2geospatial_data(points_dictionary): 1289 1397 """Convert points_dictionary to geospatial data object 1290 1398 """ 1291 1399 1292 msg = 'Points dictionary must have key pointlist' 1400 msg = 'Points dictionary must have key pointlist' 1293 1401 assert points_dictionary.has_key('pointlist'), msg 1294 1402 1295 msg = 'Points dictionary must have key attributelist' 1296 assert points_dictionary.has_key('attributelist'), msg 1403 msg = 'Points dictionary must have key attributelist' 1404 assert points_dictionary.has_key('attributelist'), msg 1297 1405 1298 1406 if points_dictionary.has_key('geo_reference'): … … 1300 1408 else: 1301 1409 geo = None 1302 1410 1303 1411 return Geospatial_data(points_dictionary['pointlist'], 1304 1412 points_dictionary['attributelist'], 1305 1413 geo_reference = geo) 1306 1414 1307 def clean_line(line,delimiter): 1415 1416 ## 1417 # @brief Split a string into 'clean' fields. 1418 # @param line The string to process. 1419 # @param delimiter The delimiter string to split 'line' with. 1420 # @return A list of 'cleaned' field strings. 1421 # @note Any fields that were initially zero length will be removed. 1422 def clean_line(line,delimiter): 1308 1423 """Remove whitespace 1309 1424 """ 1310 #print ">%s" %line 1311 line = line.strip() 1312 #print "stripped>%s" %line 1425 1426 line = line.strip() # probably unnecessary RW 1313 1427 numbers = line.split(delimiter) 1428 1314 1429 i = len(numbers) - 1 1315 1430 while i >= 0: … … 1318 1433 else: 1319 1434 numbers[i] = numbers[i].strip() 1320 1321 1435 i += -1 1322 #for num in numbers: 1323 # print "num>%s<" %num 1436 1324 1437 return numbers 1325 1438 1439 1440 ## 1441 # @brief Ensure that points are in absolute coordinates. 1442 # @param points A list or array of points to check, or geospatial object. 1443 # @param geo_reference If supplied, 1444 # @return ?? 1326 1445 def ensure_absolute(points, geo_reference=None): 1327 1446 """Ensure that points are in absolute coordinates. 1328 1447 1329 1448 This function inputs several formats and 1330 1449 outputs one format. - a numeric array of absolute points. … … 1344 1463 #It's a string - assume it is a point file 1345 1464 points = Geospatial_data(file_name=points) 1346 1465 1347 1466 if isinstance(points, Geospatial_data): 1348 1467 points = points.get_data_points(absolute=True) 1349 msg = 'Use a Geospatial_data object or a mesh origin . Not both.'1468 msg = 'Use a Geospatial_data object or a mesh origin, not both.' 1350 1469 assert geo_reference == None, msg 1351 1470 else: 1352 1471 points = ensure_numeric(points, Float) 1353 1472 1354 1473 # Sort of geo_reference and convert points 1355 1474 if geo_reference is None: … … 1363 1482 geo_reference[2]) 1364 1483 points = geo.get_absolute(points) 1365 1366 # Return 1484 1367 1485 return points 1368 1369 1486 1487 1488 ## 1489 # @brief 1490 # @param points 1491 # @param geo_reference 1492 # @return A geospatial object. 1370 1493 def ensure_geospatial(points, geo_reference=None): 1371 1494 """ … … 1374 1497 1375 1498 Inputed formats are; 1376 points: List or numeric array of coordinate pairs [xi, eta] of1377 points or geospatial object1499 points: List or numeric array of coordinate pairs [xi, eta] of 1500 points or geospatial object 1378 1501 1379 1502 mesh_origin: A geo_reference object or 3-tuples consisting of … … 1382 1505 relative to their respective origins. 1383 1506 """ 1384 1507 1385 1508 # Input check 1386 1509 if isinstance(points, Geospatial_data): 1387 msg = "Use a Geospatial_data object or a mesh origin . Not both."1510 msg = "Use a Geospatial_data object or a mesh origin, not both." 1388 1511 assert geo_reference is None, msg 1389 return points 1512 return points 1390 1513 else: 1391 1514 # List or numeric array of absolute points 1392 1515 points = ensure_numeric(points, Float) 1393 1516 1394 # Sort out geo reference 1517 # Sort out geo reference 1395 1518 if geo_reference is None: 1396 1519 geo = None … … 1402 1525 geo_reference[1], 1403 1526 geo_reference[2]) 1404 1405 # Create Geospatial_data object with appropriate geo reference and return 1406 points = Geospatial_data(data_points=points, geo_reference=geo) 1527 1528 # Create Geospatial_data object with appropriate geo reference and return 1529 points = Geospatial_data(data_points=points, geo_reference=geo) 1530 1407 1531 return points 1408 1532 1409 1533 1410 1411 def find_optimal_smoothing_parameter(data_file, 1534 ## 1535 # @brief 1536 # @param data_file 1537 # @param alpha_list 1538 # @param mesh_file 1539 # @param boundary_poly 1540 # @param mesh_resolution 1541 # @param north_boundary 1542 # @param south_boundary 1543 # @param east_boundary 1544 # @param west_boundary 1545 # @param plot_name 1546 # @param split_factor 1547 # @param seed_num 1548 # @param cache 1549 # @param verbose 1550 def find_optimal_smoothing_parameter(data_file, 1412 1551 alpha_list=None, 1413 1552 mesh_file=None, … … 1422 1561 seed_num=None, 1423 1562 cache=False, 1424 verbose=False 1425 ): 1426 1427 """ 1428 Removes a small random sample of points from 'data_file'. Then creates 1429 models with different alpha values from 'alpha_list' and cross validates 1563 verbose=False): 1564 """ 1565 Removes a small random sample of points from 'data_file'. Then creates 1566 models with different alpha values from 'alpha_list' and cross validates 1430 1567 the predicted value to the previously removed point data. Returns the 1431 1568 alpha value which has the smallest covariance. 1432 1569 1433 1570 data_file: must not contain points outside the boundaries defined 1434 and it either a pts, txt or csv file.1435 1571 and it either a pts, txt or csv file. 1572 1436 1573 alpha_list: the alpha values to test in a single list 1437 1574 1438 1575 mesh_file: name of the created mesh file or if passed in will read it. 1439 NOTE, if there is a mesh file mesh_resolution,1440 north_boundary, south... etc will be ignored.1441 1576 NOTE, if there is a mesh file mesh_resolution, 1577 north_boundary, south... etc will be ignored. 1578 1442 1579 mesh_resolution: the maximum area size for a triangle 1443 1580 1444 1581 north_boundary... west_boundary: the value of the boundary 1445 1582 1446 1583 plot_name: the name for the plot contain the results 1447 1584 1448 1585 seed_num: the seed to the random number generator 1449 1586 1450 1587 USAGE: 1451 value, alpha = find_optimal_smoothing_parameter(data_file=fileName, 1588 value, alpha = find_optimal_smoothing_parameter(data_file=fileName, 1452 1589 alpha_list=[0.0001, 0.01, 1], 1453 1590 mesh_file=None, … … 1460 1597 seed_num=100000, 1461 1598 verbose=False) 1462 1463 OUTPUT: returns the minumum normalised covalance calculate AND the 1599 1600 OUTPUT: returns the minumum normalised covalance calculate AND the 1464 1601 alpha that created it. PLUS writes a plot of the results 1465 1602 1466 1603 NOTE: code will not work if the data_file extent is greater than the 1467 1604 boundary_polygon or any of the boundaries, eg north_boundary...west_boundary 1468 1469 1605 """ 1470 1606 … … 1472 1608 from anuga.geospatial_data.geospatial_data import Geospatial_data 1473 1609 from anuga.pmesh.mesh_interface import create_mesh_from_regions 1474 1475 1610 from anuga.utilities.numerical_tools import cov 1476 1611 from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin 1477 from anuga.utilities.polygon import is_inside_polygon 1478 from anuga.fit_interpolate.benchmark_least_squares import mem_usage 1479 1612 from anuga.utilities.polygon import is_inside_polygon 1613 from anuga.fit_interpolate.benchmark_least_squares import mem_usage 1614 1480 1615 1481 1616 attribute_smoothed='elevation' … … 1483 1618 if mesh_file is None: 1484 1619 if verbose: print "building mesh" 1485 mesh_file ='temp.msh'1486 1487 if north_boundary is None or south_boundary is None or\1488 east_boundary is None or west_boundary is None:1489 no_boundary =True1620 mesh_file = 'temp.msh' 1621 1622 if north_boundary is None or south_boundary is None \ 1623 or east_boundary is None or west_boundary is None: 1624 no_boundary = True 1490 1625 else: 1491 no_boundary =False1492 1626 no_boundary = False 1627 1493 1628 if no_boundary is True: 1494 msg = 'All boundaries must be defined'1629 msg = 'All boundaries must be defined' 1495 1630 raise msg 1496 1631 … … 1499 1634 [west_boundary,north_boundary], 1500 1635 [west_boundary,south_boundary]] 1501 1636 1502 1637 create_mesh_from_regions(poly_topo, 1503 1638 boundary_tags={'back': [2], 1504 1639 'side': [1,3], 1505 1640 'ocean': [0]}, 1506 maximum_triangle_area=mesh_resolution,1507 filename=mesh_file,1508 use_cache=cache,1509 verbose=verbose)1641 maximum_triangle_area=mesh_resolution, 1642 filename=mesh_file, 1643 use_cache=cache, 1644 verbose=verbose) 1510 1645 1511 1646 else: # if mesh file provided 1512 1647 #test mesh file exists? 1513 if verbose: "reading from file: %s" % mesh_file1648 if verbose: "reading from file: %s" % mesh_file 1514 1649 if access(mesh_file,F_OK) == 0: 1515 msg ="file %s doesn't exist!" %mesh_file1650 msg = "file %s doesn't exist!" % mesh_file 1516 1651 raise IOError, msg 1517 1652 1518 1653 #split topo data 1519 if verbose: print 'Reading elevation file: %s' % data_file1654 if verbose: print 'Reading elevation file: %s' % data_file 1520 1655 G = Geospatial_data(file_name = data_file) 1521 1656 if verbose: print 'Start split' 1522 G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)1657 G_small, G_other = G.split(split_factor, seed_num, verbose=verbose) 1523 1658 if verbose: print 'Finish split' 1524 points =G_small.get_data_points()1659 points = G_small.get_data_points() 1525 1660 1526 1661 if verbose: print "Number of points in sample to compare: ", len(points) 1527 1528 if alpha_list ==None:1662 1663 if alpha_list == None: 1529 1664 alphas = [0.001,0.01,100] 1530 1665 #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\ 1531 1666 # 0.1, 1.0, 10.0, 100.0,1000.0,10000.0] 1532 1533 1667 else: 1534 alphas =alpha_list1668 alphas = alpha_list 1535 1669 1536 1670 #creates array with columns 1 and 2 are x, y. column 3 is elevation 1537 #4 onwards is the elevation_predicted using the alpha, which will 1671 #4 onwards is the elevation_predicted using the alpha, which will 1538 1672 #be compared later against the real removed data 1539 data =array([],typecode=Float)1540 1541 data=resize(data, (len(points),3+len(alphas)))1673 data = array([], typecode=Float) 1674 1675 data=resize(data, (len(points), 3+len(alphas))) 1542 1676 1543 1677 #gets relative point from sample 1544 data[:,0] =points[:,0]1545 data[:,1] =points[:,1]1546 elevation_sample =G_small.get_attributes(attribute_name=attribute_smoothed)1547 data[:,2] =elevation_sample1548 1549 normal_cov=array(zeros([len(alphas), 2]),typecode=Float)1678 data[:,0] = points[:,0] 1679 data[:,1] = points[:,1] 1680 elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed) 1681 data[:,2] = elevation_sample 1682 1683 normal_cov=array(zeros([len(alphas), 2]), typecode=Float) 1550 1684 1551 1685 if verbose: print 'Setup computational domains with different alphas' 1552 1686 1553 #print 'memory usage before domains',mem_usage() 1554 1555 for i,alpha in enumerate(alphas): 1687 for i, alpha in enumerate(alphas): 1556 1688 #add G_other data to domains with different alphas 1557 if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n' 1689 if verbose: 1690 print '\n Calculating domain and mesh for Alpha = ', alpha, '\n' 1558 1691 domain = Domain(mesh_file, use_cache=cache, verbose=verbose) 1559 if verbose:print domain.statistics() 1560 domain.set_quantity(attribute_smoothed, 1561 geospatial_data = G_other, 1562 use_cache = cache, 1563 verbose = verbose, 1564 alpha = alpha) 1565 1566 1692 if verbose: print domain.statistics() 1693 domain.set_quantity(attribute_smoothed, 1694 geospatial_data=G_other, 1695 use_cache=cache, 1696 verbose=verbose, 1697 alpha=alpha) 1698 1567 1699 # Convert points to geospatial data for use with get_values below 1568 1700 points_geo = Geospatial_data(points, domain.geo_reference) 1569 1570 #returns the predicted elevation of the points that were "split" out 1701 1702 #returns the predicted elevation of the points that were "split" out 1571 1703 #of the original data set for one particular alpha 1572 1704 if verbose: print 'Get predicted elevation for location to be compared' 1573 elevation_predicted=domain.quantities[attribute_smoothed].\ 1574 get_values(interpolation_points=points_geo) 1575 1705 elevation_predicted = \ 1706 domain.quantities[attribute_smoothed].\ 1707 get_values(interpolation_points=points_geo) 1708 1576 1709 #add predicted elevation to array that starts with x, y, z... 1577 data[:,i+3] =elevation_predicted1578 1579 sample_cov = cov(elevation_sample)1580 #print elevation_predicted1581 ele_cov= cov(elevation_sample-elevation_predicted)1582 normal_cov[i,:]= [alpha,ele_cov/sample_cov] 1583 #print 'memory usage during compare',mem_usage()1584 1585 if verbose: print'Covariance for alpha ',normal_cov[i][0],'= ',normal_cov[i][1]1586 if verbose: print'-------------------------------------------- \n'1587 1588 normal_cov0 =normal_cov[:,0]1589 normal_cov_new =take(normal_cov,argsort(normal_cov0))1710 data[:,i+3] = elevation_predicted 1711 1712 sample_cov = cov(elevation_sample) 1713 ele_cov = cov(elevation_sample - elevation_predicted) 1714 normal_cov[i,:] = [alpha,ele_cov / sample_cov] 1715 1716 if verbose: 1717 print 'Covariance for alpha ', normal_cov[i][0], '= ', \ 1718 normal_cov[i][1] 1719 print '-------------------------------------------- \n' 1720 1721 normal_cov0 = normal_cov[:,0] 1722 normal_cov_new = take(normal_cov, argsort(normal_cov0)) 1590 1723 1591 1724 if plot_name is not None: 1592 from pylab import savefig,semilogx,loglog 1593 semilogx(normal_cov_new[:,0],normal_cov_new[:,1]) 1594 loglog(normal_cov_new[:,0],normal_cov_new[:,1]) 1595 savefig(plot_name,dpi=300) 1725 from pylab import savefig, semilogx, loglog 1726 1727 semilogx(normal_cov_new[:,0], normal_cov_new[:,1]) 1728 loglog(normal_cov_new[:,0], normal_cov_new[:,1]) 1729 savefig(plot_name, dpi=300) 1730 1596 1731 if mesh_file == 'temp.msh': 1597 1732 remove(mesh_file) 1598 1599 if verbose: 1733 1734 if verbose: 1600 1735 print 'Final results:' 1601 for i,alpha in enumerate(alphas): 1602 print'covariance for alpha %s = %s ' %(normal_cov[i][0],normal_cov[i][1]) 1603 print '\n Optimal alpha is: %s ' % normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0] 1736 for i, alpha in enumerate(alphas): 1737 print 'covariance for alpha %s = %s ' \ 1738 % (normal_cov[i][0], normal_cov[i][1]) 1739 print '\n Optimal alpha is: %s ' \ 1740 % normal_cov_new[(argmin(normal_cov_new, axis=0))[1], 0] 1604 1741 1605 1742 # covariance and optimal alpha 1606 return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0] 1607 1608 def old_find_optimal_smoothing_parameter(data_file, 1609 alpha_list=None, 1610 mesh_file=None, 1611 boundary_poly=None, 1612 mesh_resolution=100000, 1613 north_boundary=None, 1614 south_boundary=None, 1615 east_boundary=None, 1616 west_boundary=None, 1617 plot_name='all_alphas', 1618 split_factor=0.1, 1619 seed_num=None, 1620 cache=False, 1621 verbose=False 1622 ): 1623 1743 return (min(normal_cov_new[:,1]), 1744 normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]) 1745 1746 1747 ## 1748 # @brief 1749 # @param data_file 1750 # @param alpha_list 1751 # @param mesh_file 1752 # @param boundary_poly 1753 # @param mesh_resolution 1754 # @param north_boundary 1755 # @param south_boundary 1756 # @param east_boundary 1757 # @param west_boundary 1758 # @param plot_name 1759 # @param split_factor 1760 # @param seed_num 1761 # @param cache 1762 # @param verbose 1763 def old_find_optimal_smoothing_parameter(data_file, 1764 alpha_list=None, 1765 mesh_file=None, 1766 boundary_poly=None, 1767 mesh_resolution=100000, 1768 north_boundary=None, 1769 south_boundary=None, 1770 east_boundary=None, 1771 west_boundary=None, 1772 plot_name='all_alphas', 1773 split_factor=0.1, 1774 seed_num=None, 1775 cache=False, 1776 verbose=False): 1624 1777 """ 1625 1778 data_file: must not contain points outside the boundaries defined 1626 and it either a pts, txt or csv file.1627 1779 and it either a pts, txt or csv file. 1780 1628 1781 alpha_list: the alpha values to test in a single list 1629 1782 1630 1783 mesh_file: name of the created mesh file or if passed in will read it. 1631 NOTE, if there is a mesh file mesh_resolution,1632 north_boundary, south... etc will be ignored.1633 1784 NOTE, if there is a mesh file mesh_resolution, 1785 north_boundary, south... etc will be ignored. 1786 1634 1787 mesh_resolution: the maximum area size for a triangle 1635 1788 1636 1789 north_boundary... west_boundary: the value of the boundary 1637 1790 1638 1791 plot_name: the name for the plot contain the results 1639 1792 1640 1793 seed_num: the seed to the random number generator 1641 1794 1642 1795 USAGE: 1643 value, alpha = find_optimal_smoothing_parameter(data_file=fileName, 1796 value, alpha = find_optimal_smoothing_parameter(data_file=fileName, 1644 1797 alpha_list=[0.0001, 0.01, 1], 1645 1798 mesh_file=None, … … 1652 1805 seed_num=100000, 1653 1806 verbose=False) 1654 1655 OUTPUT: returns the minumum normalised covalance calculate AND the 1656 alpha that created it. PLUS writes a plot of the results1657 1807 1808 OUTPUT: returns the minumum normalised covalance calculate AND the 1809 alpha that created it. PLUS writes a plot of the results 1810 1658 1811 NOTE: code will not work if the data_file extend is greater than the 1659 boundary_polygon or the north_boundary...west_boundary 1660 1812 boundary_polygon or the north_boundary...west_boundary 1661 1813 """ 1662 1814 … … 1664 1816 from anuga.geospatial_data.geospatial_data import Geospatial_data 1665 1817 from anuga.pmesh.mesh_interface import create_mesh_from_regions 1666 1667 1818 from anuga.utilities.numerical_tools import cov 1668 1819 from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin 1669 from anuga.utilities.polygon import is_inside_polygon 1670 from anuga.fit_interpolate.benchmark_least_squares import mem_usage 1671 1672 1673 attribute_smoothed ='elevation'1820 from anuga.utilities.polygon import is_inside_polygon 1821 from anuga.fit_interpolate.benchmark_least_squares import mem_usage 1822 1823 1824 attribute_smoothed = 'elevation' 1674 1825 1675 1826 if mesh_file is None: 1676 mesh_file ='temp.msh'1677 1678 if north_boundary is None or south_boundary is None or\1679 east_boundary is None or west_boundary is None:1680 no_boundary =True1827 mesh_file = 'temp.msh' 1828 1829 if north_boundary is None or south_boundary is None \ 1830 or east_boundary is None or west_boundary is None: 1831 no_boundary = True 1681 1832 else: 1682 no_boundary =False1683 1833 no_boundary = False 1834 1684 1835 if no_boundary is True: 1685 msg = 'All boundaries must be defined'1836 msg = 'All boundaries must be defined' 1686 1837 raise msg 1687 1838 … … 1690 1841 [west_boundary,north_boundary], 1691 1842 [west_boundary,south_boundary]] 1692 1843 1693 1844 create_mesh_from_regions(poly_topo, 1694 1845 boundary_tags={'back': [2], 1695 1846 'side': [1,3], 1696 1847 'ocean': [0]}, 1697 maximum_triangle_area=mesh_resolution,1698 filename=mesh_file,1699 use_cache=cache,1700 verbose=verbose)1848 maximum_triangle_area=mesh_resolution, 1849 filename=mesh_file, 1850 use_cache=cache, 1851 verbose=verbose) 1701 1852 1702 1853 else: # if mesh file provided 1703 1854 #test mesh file exists? 1704 1855 if access(mesh_file,F_OK) == 0: 1705 msg ="file %s doesn't exist!" %mesh_file1856 msg = "file %s doesn't exist!" % mesh_file 1706 1857 raise IOError, msg 1707 1858 1708 1859 #split topo data 1709 G = Geospatial_data(file_name =data_file)1860 G = Geospatial_data(file_name=data_file) 1710 1861 if verbose: print 'start split' 1711 G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)1862 G_small, G_other = G.split(split_factor, seed_num, verbose=verbose) 1712 1863 if verbose: print 'finish split' 1713 points =G_small.get_data_points()1864 points = G_small.get_data_points() 1714 1865 1715 1866 #FIXME: Remove points outside boundary polygon 1716 1867 # print 'new point',len(points) 1717 # 1868 # 1718 1869 # new_points=[] 1719 1870 # new_points=array([],typecode=Float) … … 1726 1877 # points = new_points 1727 1878 1728 1729 1879 if verbose: print "Number of points in sample to compare: ", len(points) 1730 1731 if alpha_list ==None:1880 1881 if alpha_list == None: 1732 1882 alphas = [0.001,0.01,100] 1733 1883 #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\ 1734 1884 # 0.1, 1.0, 10.0, 100.0,1000.0,10000.0] 1735 1736 1885 else: 1737 alphas=alpha_list 1886 alphas = alpha_list 1887 1738 1888 domains = {} 1739 1889 1740 1890 if verbose: print 'Setup computational domains with different alphas' 1741 1742 print 'memory usage before domains',mem_usage()1743 1891 1744 1892 for alpha in alphas: 1745 1893 #add G_other data to domains with different alphas 1746 if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n' 1894 if verbose: 1895 print '\n Calculating domain and mesh for Alpha = ', alpha, '\n' 1747 1896 domain = Domain(mesh_file, use_cache=cache, verbose=verbose) 1748 if verbose:print domain.statistics() 1749 domain.set_quantity(attribute_smoothed, 1750 geospatial_data = G_other, 1751 use_cache = cache, 1752 verbose = verbose, 1753 alpha = alpha) 1754 domains[alpha]=domain 1755 1756 print 'memory usage after domains',mem_usage() 1897 if verbose: print domain.statistics() 1898 domain.set_quantity(attribute_smoothed, 1899 geospatial_data=G_other, 1900 use_cache=cache, 1901 verbose=verbose, 1902 alpha=alpha) 1903 domains[alpha] = domain 1757 1904 1758 1905 #creates array with columns 1 and 2 are x, y. column 3 is elevation 1759 #4 onwards is the elevation_predicted using the alpha, which will 1906 #4 onwards is the elevation_predicted using the alpha, which will 1760 1907 #be compared later against the real removed data 1761 data =array([],typecode=Float)1762 1763 data =resize(data,(len(points),3+len(alphas)))1908 data = array([], typecode=Float) 1909 1910 data = resize(data, (len(points), 3+len(alphas))) 1764 1911 1765 1912 #gets relative point from sample 1766 data[:,0]=points[:,0] 1767 data[:,1]=points[:,1] 1768 elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed) 1769 data[:,2]=elevation_sample 1770 1771 normal_cov=array(zeros([len(alphas),2]),typecode=Float) 1772 1773 if verbose: print 'Determine difference between predicted results and actual data' 1774 for i,alpha in enumerate(domains): 1775 if verbose: print'Alpha =',alpha 1776 1777 points_geo=domains[alpha].geo_reference.change_points_geo_ref(points) 1778 #returns the predicted elevation of the points that were "split" out 1913 data[:,0] = points[:,0] 1914 data[:,1] = points[:,1] 1915 elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed) 1916 data[:,2] = elevation_sample 1917 1918 normal_cov = array(zeros([len(alphas), 2]), typecode=Float) 1919 1920 if verbose: 1921 print 'Determine difference between predicted results and actual data' 1922 for i, alpha in enumerate(domains): 1923 if verbose: print'Alpha =', alpha 1924 1925 points_geo = domains[alpha].geo_reference.change_points_geo_ref(points) 1926 #returns the predicted elevation of the points that were "split" out 1779 1927 #of the original data set for one particular alpha 1780 elevation_predicted=domains[alpha].quantities[attribute_smoothed].\ 1781 get_values(interpolation_points=points_geo) 1782 1928 elevation_predicted = \ 1929 domains[alpha].quantities[attribute_smoothed].\ 1930 get_values(interpolation_points=points_geo) 1931 1783 1932 #add predicted elevation to array that starts with x, y, z... 1784 data[:,i+3]=elevation_predicted 1785 1786 sample_cov= cov(elevation_sample) 1787 #print elevation_predicted 1788 ele_cov= cov(elevation_sample-elevation_predicted) 1789 normal_cov[i,:]= [alpha,ele_cov/sample_cov] 1790 print 'memory usage during compare',mem_usage() 1791 1792 1793 if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1] 1794 1795 normal_cov0=normal_cov[:,0] 1796 normal_cov_new=take(normal_cov,argsort(normal_cov0)) 1933 data[:,i+3] = elevation_predicted 1934 1935 sample_cov = cov(elevation_sample) 1936 ele_cov = cov(elevation_sample - elevation_predicted) 1937 normal_cov[i,:] = [alpha,ele_cov / sample_cov] 1938 print 'memory usage during compare', mem_usage() 1939 if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1] 1940 1941 normal_cov0 = normal_cov[:,0] 1942 normal_cov_new = take(normal_cov, argsort(normal_cov0)) 1797 1943 1798 1944 if plot_name is not None: 1799 1945 from pylab import savefig,semilogx,loglog 1800 semilogx(normal_cov_new[:,0],normal_cov_new[:,1]) 1801 loglog(normal_cov_new[:,0],normal_cov_new[:,1]) 1802 savefig(plot_name,dpi=300) 1946 1947 semilogx(normal_cov_new[:,0], normal_cov_new[:,1]) 1948 loglog(normal_cov_new[:,0], normal_cov_new[:,1]) 1949 savefig(plot_name, dpi=300) 1803 1950 if mesh_file == 'temp.msh': 1804 1951 remove(mesh_file) 1805 1806 return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0] 1807 1808 1952 1953 return (min(normal_cov_new[:,1]), 1954 normal_cov_new[(argmin(normal_cov_new, axis=0))[1],0]) 1955 1956 1809 1957 if __name__ == "__main__": 1810 1958 pass 1811 1959
Note: See TracChangeset
for help on using the changeset viewer.