Ignore:
Timestamp:
Feb 6, 2008, 2:49:33 PM (17 years ago)
Author:
nick
Message:

update to find_optimal_smoothing_alpha to hopefully reduce the memory required

File:
1 edited

Legend:

Unmodified
Added
Removed
  • anuga_core/source/anuga/geospatial_data/geospatial_data.py

    r4879 r5003  
    2323from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
    2424from anuga.utilities.anuga_exceptions import ANUGAError
    25 from anuga.config import points_file_block_line_size as MAX_READ_LINES   
     25from anuga.config import points_file_block_line_size as MAX_READ_LINES 
     26#from anuga.fit_interpolate.benchmark_least_squares import mem_usage
     27 
    2628
    2729DEFAULT_ATTRIBUTE = 'elevation'
     
    14391441    from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
    14401442    from anuga.utilities.polygon import is_inside_polygon
     1443    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
     1444   
    14411445
    14421446    attribute_smoothed='elevation'
     
    14821486    points=G_small.get_data_points()
    14831487
     1488
     1489
     1490
    14841491    #FIXME: Remove points outside boundary polygon
    14851492#    print 'new point',len(points)
     
    15051512    else:
    15061513        alphas=alpha_list
     1514#    domains = {}
     1515
     1516    #creates array with columns 1 and 2 are x, y. column 3 is elevation
     1517    #4 onwards is the elevation_predicted using the alpha, which will
     1518    #be compared later against the real removed data
     1519    data=array([],typecode=Float)
     1520
     1521    data=resize(data,(len(points),3+len(alphas)))
     1522
     1523    #gets relative point from sample
     1524    data[:,0]=points[:,0]
     1525    data[:,1]=points[:,1]
     1526    elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
     1527    data[:,2]=elevation_sample
     1528
     1529    normal_cov=array(zeros([len(alphas),2]),typecode=Float)
     1530
     1531
     1532
     1533
     1534
     1535    if verbose: print 'Setup computational domains with different alphas'
     1536
     1537    #print 'memory usage before domains',mem_usage()
     1538
     1539    for i,alpha in enumerate(alphas):
     1540        #add G_other data to domains with different alphas
     1541        if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
     1542        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
     1543        if verbose:print domain.statistics()
     1544        domain.set_quantity(attribute_smoothed,
     1545                    geospatial_data = G_other,
     1546                    use_cache = cache,
     1547                    verbose = verbose,
     1548                    alpha = alpha)
     1549#        domains[alpha]=domain
     1550
     1551        points_geo=domain.geo_reference.change_points_geo_ref(points)
     1552        #returns the predicted elevation of the points that were "split" out
     1553        #of the original data set for one particular alpha
     1554        elevation_predicted=domain.quantities[attribute_smoothed].\
     1555                            get_values(interpolation_points=points_geo)
     1556 
     1557        #add predicted elevation to array that starts with x, y, z...
     1558        data[:,i+3]=elevation_predicted
     1559
     1560        sample_cov= cov(elevation_sample)
     1561        #print elevation_predicted
     1562        ele_cov= cov(elevation_sample-elevation_predicted)
     1563        normal_cov[i,:]= [alpha,ele_cov/sample_cov]
     1564        #print 'memory usage during compare',mem_usage()
     1565       
     1566
     1567        if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
     1568
     1569
     1570
     1571#    if verbose: print 'Determine difference between predicted results and actual data'
     1572#    for i,alpha in enumerate(domains):
     1573#        if verbose: print'Alpha =',alpha
     1574#       
     1575#        points_geo=domains[alpha].geo_reference.change_points_geo_ref(points)
     1576#        #returns the predicted elevation of the points that were "split" out
     1577#        #of the original data set for one particular alpha
     1578#        elevation_predicted=domains[alpha].quantities[attribute_smoothed].\
     1579#                            get_values(interpolation_points=points_geo)
     1580#
     1581#        #add predicted elevation to array that starts with x, y, z...
     1582#        data[:,i+3]=elevation_predicted
     1583#
     1584#        sample_cov= cov(elevation_sample)
     1585#        #print elevation_predicted
     1586#        ele_cov= cov(elevation_sample-elevation_predicted)
     1587#        normal_cov[i,:]= [alpha,ele_cov/sample_cov]
     1588#        print 'memory usage during compare',mem_usage()
     1589#       
     1590#
     1591#        if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
     1592
     1593    normal_cov0=normal_cov[:,0]
     1594    normal_cov_new=take(normal_cov,argsort(normal_cov0))
     1595
     1596    if plot_name is not None:
     1597        from pylab import savefig,semilogx,loglog
     1598        semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
     1599        loglog(normal_cov_new[:,0],normal_cov_new[:,1])
     1600        savefig(plot_name,dpi=300)
     1601    if mesh_file == 'temp.msh':
     1602        remove(mesh_file)
     1603   
     1604    return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
     1605
     1606def old_find_optimal_smoothing_parameter(data_file,
     1607                                     alpha_list=None,
     1608                                     mesh_file=None,
     1609                                     boundary_poly=None,
     1610                                     mesh_resolution=100000,
     1611                                     north_boundary=None,
     1612                                     south_boundary=None,
     1613                                     east_boundary=None,
     1614                                     west_boundary=None,
     1615                                     plot_name='all_alphas',
     1616                                     split_factor=0.1,
     1617                                     seed_num=None,
     1618                                     cache=False,
     1619                                     verbose=False
     1620                                     ):
     1621   
     1622    """
     1623    data_file: must not contain points outside the boundaries defined
     1624           and it either a pts, txt or csv file.
     1625   
     1626    alpha_list: the alpha values to test in a single list
     1627   
     1628    mesh_file: name of the created mesh file or if passed in will read it.
     1629            NOTE, if there is a mesh file mesh_resolution,
     1630            north_boundary, south... etc will be ignored.
     1631   
     1632    mesh_resolution: the maximum area size for a triangle
     1633   
     1634    north_boundary... west_boundary: the value of the boundary
     1635   
     1636    plot_name: the name for the plot contain the results
     1637   
     1638    seed_num: the seed to the random number generator
     1639   
     1640    USAGE:
     1641        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
     1642                                             alpha_list=[0.0001, 0.01, 1],
     1643                                             mesh_file=None,
     1644                                             mesh_resolution=3,
     1645                                             north_boundary=5,
     1646                                             south_boundary=-5,
     1647                                             east_boundary=5,
     1648                                             west_boundary=-5,
     1649                                             plot_name='all_alphas',
     1650                                             seed_num=100000,
     1651                                             verbose=False)
     1652   
     1653    OUTPUT: returns the minumum normalised covalance calculate AND the
     1654           alpha that created it. PLUS writes a plot of the results
     1655           
     1656    NOTE: code will not work if the data_file extend is greater than the
     1657    boundary_polygon or the north_boundary...west_boundary
     1658       
     1659    """
     1660
     1661    from anuga.shallow_water import Domain
     1662    from anuga.geospatial_data.geospatial_data import Geospatial_data
     1663    from anuga.pmesh.mesh_interface import create_mesh_from_regions
     1664   
     1665    from anuga.utilities.numerical_tools import cov
     1666    from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
     1667    from anuga.utilities.polygon import is_inside_polygon
     1668    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
     1669   
     1670
     1671    attribute_smoothed='elevation'
     1672
     1673    if mesh_file is None:
     1674        mesh_file='temp.msh'
     1675
     1676        if north_boundary is None or south_boundary is None or \
     1677           east_boundary is None or west_boundary is None:
     1678            no_boundary=True
     1679        else:
     1680            no_boundary=False
     1681       
     1682        if no_boundary is True:
     1683            msg= 'All boundaries must be defined'
     1684            raise msg
     1685
     1686        poly_topo = [[east_boundary,south_boundary],
     1687                     [east_boundary,north_boundary],
     1688                     [west_boundary,north_boundary],
     1689                     [west_boundary,south_boundary]]
     1690                     
     1691        create_mesh_from_regions(poly_topo,
     1692                                 boundary_tags={'back': [2],
     1693                                                'side': [1,3],
     1694                                                'ocean': [0]},
     1695                             maximum_triangle_area=mesh_resolution,
     1696                             filename=mesh_file,
     1697                             use_cache=cache,
     1698                             verbose=verbose)
     1699
     1700    else: # if mesh file provided
     1701        #test mesh file exists?
     1702        if access(mesh_file,F_OK) == 0:
     1703            msg="file %s doesn't exist!" %mesh_file
     1704            raise IOError, msg
     1705
     1706    #split topo data
     1707    G = Geospatial_data(file_name = data_file)
     1708    if verbose: print 'start split'
     1709    G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
     1710    if verbose: print 'finish split'
     1711    points=G_small.get_data_points()
     1712
     1713    #FIXME: Remove points outside boundary polygon
     1714#    print 'new point',len(points)
     1715#   
     1716#    new_points=[]
     1717#    new_points=array([],typecode=Float)
     1718#    new_points=resize(new_points,(len(points),2))
     1719#    print "BOUNDARY", boundary_poly
     1720#    for i,point in enumerate(points):
     1721#        if is_inside_polygon(point,boundary_poly, verbose=True):
     1722#            new_points[i] = point
     1723#            print"WOW",i,new_points[i]
     1724#    points = new_points
     1725
     1726   
     1727    if verbose: print "Number of points in sample to compare: ", len(points)
     1728   
     1729    if alpha_list==None:
     1730        alphas = [0.001,0.01,100]
     1731        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
     1732         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
     1733       
     1734    else:
     1735        alphas=alpha_list
    15071736    domains = {}
    15081737
    15091738    if verbose: print 'Setup computational domains with different alphas'
     1739
     1740    print 'memory usage before domains',mem_usage()
     1741
    15101742    for alpha in alphas:
    15111743        #add G_other data to domains with different alphas
     
    15201752        domains[alpha]=domain
    15211753
     1754    print 'memory usage after domains',mem_usage()
     1755
    15221756    #creates array with columns 1 and 2 are x, y. column 3 is elevation
    15231757    #4 onwards is the elevation_predicted using the alpha, which will
     
    15521786        ele_cov= cov(elevation_sample-elevation_predicted)
    15531787        normal_cov[i,:]= [alpha,ele_cov/sample_cov]
     1788        print 'memory usage during compare',mem_usage()
     1789       
    15541790
    15551791        if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
Note: See TracChangeset for help on using the changeset viewer.