Changeset 6304 for branches/numpy/anuga/geospatial_data/geospatial_data.py
- Timestamp:
- Feb 10, 2009, 11:11:04 AM (15 years ago)
- Location:
- branches/numpy
- Files:
-
- 1 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/numpy/anuga/geospatial_data/geospatial_data.py
r6166 r6304 13 13 from Scientific.IO.NetCDF import NetCDFFile 14 14 15 import Numericas num15 import numpy as num 16 16 17 17 from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL … … 23 23 from anuga.config import points_file_block_line_size as MAX_READ_LINES 24 24 from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a 25 from anuga.config import netcdf_float 25 26 26 27 DEFAULT_ATTRIBUTE = 'elevation' … … 56 57 load_file_now=True, 57 58 verbose=False): 58 """ 59 Create instance from data points and associated attributes 59 """Create instance from data points and associated attributes 60 60 61 61 data_points: x,y coordinates in meters. Type must be either a 62 sequence of 2-tuples or an Mx2 Numeric array of floats. A file name62 sequence of 2-tuples or an Mx2 numeric array of floats. A file name 63 63 with extension .txt, .cvs or .pts can also be passed in here. 64 64 … … 131 131 132 132 verbose: 133 134 133 """ 135 134 136 135 if isinstance(data_points, basestring): 137 # assume data pointis really a file name136 # assume data_points is really a file name 138 137 file_name = data_points 139 138 140 139 self.set_verbose(verbose) 141 self.geo_reference = None # create the attribute140 self.geo_reference = None # create the attribute 142 141 self.file_name = file_name 143 142 … … 157 156 data_points=data_points, 158 157 points_are_lats_longs=\ 159 158 points_are_lats_longs) 160 159 self.check_data_points(data_points) 161 160 self.set_attributes(attributes) … … 188 187 # This message was misleading. 189 188 # FIXME (Ole): Are we blocking here or not? 190 # print 'ASCII formats are not that great for '191 # print 'blockwise reading. Consider storing this'192 # print 'data as a pts NetCDF format'189 # print 'ASCII formats are not that great for ' 190 # print 'blockwise reading. Consider storing this' 191 # print 'data as a pts NetCDF format' 193 192 194 193 ## … … 203 202 204 203 ## 205 # @brief Save points data in instance.206 # @param data_points Points data to store in instance and check.204 # @brief Check data points. 205 # @param data_points Points data to check and store in instance. 207 206 # @note Throws ValueError exception if no data. 208 207 def check_data_points(self, data_points): 209 """Checks data points 210 """ 208 """Checks data points""" 211 209 212 210 if data_points is None: … … 217 215 else: 218 216 self.data_points = ensure_numeric(data_points) 217 return 218 219 print 'self.data_points=%s' % str(self.data_points) 220 print 'self.data_points.shape=%s' % str(self.data_points.shape) 219 221 if not (0,) == self.data_points.shape: 220 222 assert len(self.data_points.shape) == 2 … … 226 228 # @note Throws exception if unable to convert dict keys to numeric. 227 229 def set_attributes(self, attributes): 228 """Check and assign attributes dictionary 229 """ 230 """Check and assign attributes dictionary""" 230 231 231 232 if attributes is None: … … 234 235 235 236 if not isinstance(attributes, DictType): 236 # Convert single attribute into dictionary237 # Convert single attribute into dictionary 237 238 attributes = {DEFAULT_ATTRIBUTE: attributes} 238 239 239 # Check input attributes240 # Check input attributes 240 241 for key in attributes.keys(): 241 242 try: 242 243 attributes[key] = ensure_numeric(attributes[key]) 243 244 except: 244 msg = 'Attribute %s could not be converted' %key245 msg += 'to a numeric vector'246 raise msg245 msg = ("Attribute '%s' (%s) could not be converted to a" 246 "numeric vector" % (str(key), str(attributes[key]))) 247 raise Exception, msg 247 248 248 249 self.attributes = attributes 249 250 250 #def set_geo_reference(self, geo_reference):251 # # FIXME (Ole): Backwards compatibility - deprecate252 # self.setgeo_reference(geo_reference)253 254 251 ## 255 252 # @brief Set the georeference of geospatial data. 256 # @param geo_reference The georeference data 253 # @param geo_reference The georeference data to set. 257 254 # @note Will raise exception if param not instance of Geo_reference. 258 255 def set_geo_reference(self, geo_reference): … … 277 274 msg = 'Argument geo_reference must be a valid Geo_reference ' 278 275 msg += 'object or None.' 279 raise msg276 raise Expection, msg 280 277 281 278 # If a geo_reference already exists, change the point data according to … … 511 508 raise Exception, msg 512 509 else: 513 # other is None:510 # other is None: 514 511 new_points = self.get_data_points(absolute=True) 515 512 new_attributes = self.attributes … … 525 522 # @return The new geospatial object. 526 523 def __radd__(self, other): 527 """Handle cases like None + Geospatial_data(...) 528 """ 524 """Handle cases like None + Geospatial_data(...)""" 529 525 530 526 return self + other … … 542 538 def import_points_file(self, file_name, delimiter=None, verbose=False): 543 539 """ load an .txt, .csv or .pts file 540 544 541 Note: will throw an IOError/SyntaxError if it can't load the file. 545 542 Catch these! … … 571 568 except SyntaxError, e: 572 569 # This should only be if there is a format error 573 msg = ' Could not openfile %s. \n' %file_name570 msg = 'Problem with format of file %s. \n' %file_name 574 571 msg += Error_message['IOError'] 575 572 raise SyntaxError, msg … … 591 588 as_lat_long=False, isSouthHemisphere=True): 592 589 593 """ 594 write a points file, file_name, as a text (.csv) or binary (.pts) file 590 """write a points file as a text (.csv) or binary (.pts) file 591 595 592 file_name is the file name, including the extension 596 593 The point_dict is defined at the top of this file. … … 659 656 """ 660 657 661 # FIXME: add the geo_reference to this658 # FIXME: add the geo_reference to this 662 659 points = self.get_data_points() 663 660 sampled_points = num.take(points, indices) … … 679 676 # @note Points in each result object are selected randomly. 680 677 def split(self, factor=0.5, seed_num=None, verbose=False): 681 """Returns two 682 geospatial_data object, first is the size of the 'factor' 678 """Returns two geospatial_data object, first is the size of the 'factor' 683 679 smaller the original and the second is the remainder. The two 684 new object are disjoin setof each other.680 new objects are disjoint sets of each other. 685 681 686 682 Points of the two new object have selected RANDOMLY. … … 707 703 if verbose: print "make unique random number list and get indices" 708 704 709 total=num.array(range(self_size) , num.Int) #array default#705 total=num.array(range(self_size)) 710 706 total_list = total.tolist() 711 707 … … 731 727 random_num = random_num.tolist() 732 728 733 # need to sort and reverse so the pop() works correctly729 # need to sort and reverse so the pop() works correctly 734 730 random_num.sort() 735 731 random_num.reverse() … … 748 744 random_list.append(remainder_list.pop(i)) 749 745 j += 1 750 # prints progress746 # prints progress 751 747 if verbose and round(random_num_len/10*k) == j: 752 748 print '(%s/%s)' % (j, random_num_len) … … 779 775 """ 780 776 from Scientific.IO.NetCDF import NetCDFFile 781 # FIXME - what to do if the file isn't there777 # FIXME - what to do if the file isn't there 782 778 783 779 # FIXME (Ole): Shouldn't this go into the constructor? … … 941 937 data_points, 942 938 points_are_lats_longs): 943 """ 944 if the points has lat long info, assume it is in (lat, long) order. 945 """ 939 """If the points has lat long info, assume it is in (lat, long) order.""" 946 940 947 941 if geo_reference is not None: … … 1055 1049 header, 1056 1050 max_read_lines=1e30) 1057 # If the file is bigger that this, block..1051 # If the file is bigger that this, block.. 1058 1052 # FIXME (Ole) What's up here? 1059 1053 except ANUGAError: … … 1082 1076 """ 1083 1077 1084 line = file_pointer.readline() 1078 line = file_pointer.readline().strip() 1085 1079 header = clean_line(line, delimiter) 1086 1080 … … 1095 1089 # @param verbose True if this function is to be verbose. 1096 1090 # @note Will throw IndexError, SyntaxError exceptions. 1097 def _read_csv_file_blocking(file_pointer, header, 1091 def _read_csv_file_blocking(file_pointer, 1092 header, 1098 1093 delimiter=CSV_DELIMITER, 1099 1094 max_read_lines=MAX_READ_LINES, … … 1150 1145 raise StopIteration 1151 1146 1152 pointlist = num.array(points, num. Float)1147 pointlist = num.array(points, num.float) 1153 1148 for key in att_dict.keys(): 1154 att_dict[key] = num.array(att_dict[key], num. Float)1149 att_dict[key] = num.array(att_dict[key], num.float) 1155 1150 1156 1151 # Do stuff here so the info is in lat's and longs … … 1183 1178 # @note Will throw IOError and AttributeError exceptions. 1184 1179 def _read_pts_file_header(fid, verbose=False): 1185 """Read the geo_reference and number_of_points from a .pts file 1186 """ 1180 """Read the geo_reference and number_of_points from a .pts file""" 1187 1181 1188 1182 keys = fid.variables.keys() … … 1212 1206 # @return Tuple of (pointlist, attributes). 1213 1207 def _read_pts_file_blocking(fid, start_row, fin_row, keys): 1214 """Read the body of a .csv file. 1215 """ 1208 """Read the body of a .csv file.""" 1216 1209 1217 1210 pointlist = num.array(fid.variables['points'][start_row:fin_row]) … … 1239 1232 1240 1233 WARNING: This function mangles the point_atts data structure 1241 # F??ME: (DSG)This format has issues.1234 # F??ME: (DSG)This format has issues. 1242 1235 # There can't be an attribute called points 1243 1236 # consider format change … … 1264 1257 shape = write_data_points.shape[0] 1265 1258 outfile.createDimension('number_of_points', shape) 1266 outfile.createDimension('number_of_dimensions', 2) # This is 2d data1259 outfile.createDimension('number_of_dimensions', 2) # This is 2d data 1267 1260 1268 1261 # Variable definition 1269 outfile.createVariable('points', n um.Float, ('number_of_points',1270 'number_of_dimensions'))1271 1272 # create variables1273 outfile.variables['points'][:] = write_data_points #.astype(Float32)1262 outfile.createVariable('points', netcdf_float, ('number_of_points', 1263 'number_of_dimensions')) 1264 1265 # create variables 1266 outfile.variables['points'][:] = write_data_points 1274 1267 1275 1268 if write_attributes is not None: 1276 1269 for key in write_attributes.keys(): 1277 outfile.createVariable(key, n um.Float, ('number_of_points',))1278 outfile.variables[key][:] = write_attributes[key] #.astype(Float32)1270 outfile.createVariable(key, netcdf_float, ('number_of_points',)) 1271 outfile.variables[key][:] = write_attributes[key] 1279 1272 1280 1273 if write_geo_reference is not None: … … 1296 1289 as_lat_long=False, 1297 1290 delimiter=','): 1298 """Write a .csv file. 1299 """ 1291 """Write a .csv file.""" 1300 1292 1301 1293 points = write_data_points … … 1361 1353 # @return ?? 1362 1354 def _point_atts2array(point_atts): 1363 point_atts['pointlist'] = num.array(point_atts['pointlist'], num. Float)1355 point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float) 1364 1356 1365 1357 for key in point_atts['attributelist'].keys(): 1366 1358 point_atts['attributelist'][key] = \ 1367 num.array(point_atts['attributelist'][key], num. Float)1359 num.array(point_atts['attributelist'][key], num.float) 1368 1360 1369 1361 return point_atts … … 1375 1367 # @return A points dictionary. 1376 1368 def geospatial_data2points_dictionary(geospatial_data): 1377 """Convert geospatial data to points_dictionary 1378 """ 1369 """Convert geospatial data to points_dictionary""" 1379 1370 1380 1371 points_dictionary = {} … … 1396 1387 # @param points_dictionary A points dictionary to convert. 1397 1388 def points_dictionary2geospatial_data(points_dictionary): 1398 """Convert points_dictionary to geospatial data object 1399 """ 1389 """Convert points_dictionary to geospatial data object""" 1400 1390 1401 1391 msg = 'Points dictionary must have key pointlist' … … 1417 1407 ## 1418 1408 # @brief Split a string into 'clean' fields. 1419 # @param lineThe string to process.1409 # @param str The string to process. 1420 1410 # @param delimiter The delimiter string to split 'line' with. 1421 1411 # @return A list of 'cleaned' field strings. 1422 1412 # @note Any fields that were initially zero length will be removed. 1423 def clean_line(line,delimiter): 1424 """Remove whitespace 1425 """ 1426 1427 line = line.strip() # probably unnecessary RW 1428 numbers = line.split(delimiter) 1429 1430 i = len(numbers) - 1 1431 while i >= 0: 1432 if numbers[i] == '': 1433 numbers.pop(i) 1434 else: 1435 numbers[i] = numbers[i].strip() 1436 i += -1 1437 1438 return numbers 1413 # @note If a field contains '\n' it isn't zero length. 1414 def clean_line(str, delimiter): 1415 """Split string on given delimiter, remove whitespace from each field.""" 1416 1417 return [x.strip() for x in str.split(delimiter) if x != ''] 1439 1418 1440 1419 … … 1462 1441 # Input check 1463 1442 if isinstance(points, basestring): 1464 # It's a string - assume it is a point file1443 # It's a string - assume it is a point file 1465 1444 points = Geospatial_data(file_name=points) 1466 1445 … … 1470 1449 assert geo_reference == None, msg 1471 1450 else: 1472 points = ensure_numeric(points, num. Float)1451 points = ensure_numeric(points, num.float) 1473 1452 1474 1453 # Sort of geo_reference and convert points 1475 1454 if geo_reference is None: 1476 geo = None # Geo_reference()1455 geo = None # Geo_reference() 1477 1456 else: 1478 1457 if isinstance(geo_reference, Geo_reference): … … 1493 1472 # @return A geospatial object. 1494 1473 def ensure_geospatial(points, geo_reference=None): 1495 """ 1496 This function inputs several formats and 1497 outputs one format. - a geospatial_data instance. 1474 """Convert various data formats to a geospatial_data instance. 1498 1475 1499 1476 Inputed formats are; … … 1514 1491 else: 1515 1492 # List or numeric array of absolute points 1516 points = ensure_numeric(points, num. Float)1493 points = ensure_numeric(points, num.float) 1517 1494 1518 1495 # Sort out geo reference … … 1563 1540 cache=False, 1564 1541 verbose=False): 1565 """ 1566 Removes a small random sample of points from 'data_file'. Then creates 1567 models with different alpha values from 'alpha_list' and cross validates 1568 the predicted value to the previously removed point data. Returns the 1569 alpha value which has the smallest covariance. 1542 """Removes a small random sample of points from 'data_file'. 1543 Then creates models with different alpha values from 'alpha_list' and 1544 cross validates the predicted value to the previously removed point data. 1545 Returns the alpha value which has the smallest covariance. 1570 1546 1571 1547 data_file: must not contain points outside the boundaries defined … … 1628 1604 if no_boundary is True: 1629 1605 msg = 'All boundaries must be defined' 1630 raise msg1606 raise Expection, msg 1631 1607 1632 1608 poly_topo = [[east_boundary,south_boundary], … … 1645 1621 1646 1622 else: # if mesh file provided 1647 # test mesh file exists?1623 # test mesh file exists? 1648 1624 if verbose: "reading from file: %s" % mesh_file 1649 1625 if access(mesh_file,F_OK) == 0: … … 1651 1627 raise IOError, msg 1652 1628 1653 # split topo data1629 # split topo data 1654 1630 if verbose: print 'Reading elevation file: %s' % data_file 1655 1631 G = Geospatial_data(file_name = data_file) … … 1668 1644 alphas = alpha_list 1669 1645 1670 # creates array with columns 1 and 2 are x, y. column 3 is elevation1671 # 4 onwards is the elevation_predicted using the alpha, which will1672 # be compared later against the real removed data1673 data = num.array([], typecode=num.Float)1646 # creates array with columns 1 and 2 are x, y. column 3 is elevation 1647 # 4 onwards is the elevation_predicted using the alpha, which will 1648 # be compared later against the real removed data 1649 data = num.array([], dtype=num.float) 1674 1650 1675 1651 data=num.resize(data, (len(points), 3+len(alphas))) 1676 1652 1677 # gets relative point from sample1653 # gets relative point from sample 1678 1654 data[:,0] = points[:,0] 1679 1655 data[:,1] = points[:,1] … … 1681 1657 data[:,2] = elevation_sample 1682 1658 1683 normal_cov=num.array(num.zeros([len(alphas), 2]), typecode=num.Float)1659 normal_cov=num.array(num.zeros([len(alphas), 2]), dtype=num.float) 1684 1660 1685 1661 if verbose: print 'Setup computational domains with different alphas' 1686 1662 1687 1663 for i, alpha in enumerate(alphas): 1688 # add G_other data to domains with different alphas1664 # add G_other data to domains with different alphas 1689 1665 if verbose: 1690 1666 print '\n Calculating domain and mesh for Alpha = ', alpha, '\n' … … 1700 1676 points_geo = Geospatial_data(points, domain.geo_reference) 1701 1677 1702 # returns the predicted elevation of the points that were "split" out1703 # of the original data set for one particular alpha1678 # returns the predicted elevation of the points that were "split" out 1679 # of the original data set for one particular alpha 1704 1680 if verbose: print 'Get predicted elevation for location to be compared' 1705 1681 elevation_predicted = \ … … 1707 1683 get_values(interpolation_points=points_geo) 1708 1684 1709 # add predicted elevation to array that starts with x, y, z...1685 # add predicted elevation to array that starts with x, y, z... 1710 1686 data[:,i+3] = elevation_predicted 1711 1687 … … 1834 1810 if no_boundary is True: 1835 1811 msg = 'All boundaries must be defined' 1836 raise msg1812 raise Expection, msg 1837 1813 1838 1814 poly_topo = [[east_boundary,south_boundary], … … 1851 1827 1852 1828 else: # if mesh file provided 1853 # test mesh file exists?1829 # test mesh file exists? 1854 1830 if access(mesh_file,F_OK) == 0: 1855 1831 msg = "file %s doesn't exist!" % mesh_file 1856 1832 raise IOError, msg 1857 1833 1858 # split topo data1834 # split topo data 1859 1835 G = Geospatial_data(file_name=data_file) 1860 1836 if verbose: print 'start split' … … 1863 1839 points = G_small.get_data_points() 1864 1840 1865 # FIXME: Remove points outside boundary polygon1841 # FIXME: Remove points outside boundary polygon 1866 1842 # print 'new point',len(points) 1867 1843 # 1868 1844 # new_points=[] 1869 # new_points=array([], typecode=Float)1845 # new_points=array([],dtype=float) 1870 1846 # new_points=resize(new_points,(len(points),2)) 1871 1847 # print "BOUNDARY", boundary_poly … … 1890 1866 1891 1867 for alpha in alphas: 1892 # add G_other data to domains with different alphas1868 # add G_other data to domains with different alphas 1893 1869 if verbose: 1894 1870 print '\n Calculating domain and mesh for Alpha = ', alpha, '\n' … … 1902 1878 domains[alpha] = domain 1903 1879 1904 # creates array with columns 1 and 2 are x, y. column 3 is elevation1905 # 4 onwards is the elevation_predicted using the alpha, which will1906 # be compared later against the real removed data1907 data = num.array([], typecode=num.Float)1880 # creates array with columns 1 and 2 are x, y. column 3 is elevation 1881 # 4 onwards is the elevation_predicted using the alpha, which will 1882 # be compared later against the real removed data 1883 data = num.array([], dtype=num.float) 1908 1884 1909 1885 data = num.resize(data, (len(points), 3+len(alphas))) 1910 1886 1911 # gets relative point from sample1887 # gets relative point from sample 1912 1888 data[:,0] = points[:,0] 1913 1889 data[:,1] = points[:,1] … … 1915 1891 data[:,2] = elevation_sample 1916 1892 1917 normal_cov = num.array(num.zeros([len(alphas), 2]), typecode=num.Float)1893 normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float) 1918 1894 1919 1895 if verbose: … … 1923 1899 1924 1900 points_geo = domains[alpha].geo_reference.change_points_geo_ref(points) 1925 # returns the predicted elevation of the points that were "split" out1926 # of the original data set for one particular alpha1901 # returns the predicted elevation of the points that were "split" out 1902 # of the original data set for one particular alpha 1927 1903 elevation_predicted = \ 1928 1904 domains[alpha].quantities[attribute_smoothed].\ 1929 1905 get_values(interpolation_points=points_geo) 1930 1906 1931 # add predicted elevation to array that starts with x, y, z...1907 # add predicted elevation to array that starts with x, y, z... 1932 1908 data[:,i+3] = elevation_predicted 1933 1909
Note: See TracChangeset
for help on using the changeset viewer.