Changeset 6081


Ignore:
Timestamp:
Dec 16, 2008, 10:17:36 AM (15 years ago)
Author:
rwilson
Message:

pep8 formatted.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • anuga_core/source/anuga/geospatial_data/geospatial_data.py

    r5739 r6081  
    1 """Class Geospatial_data - Manipulation of locations on the planet and 
     1"""Class Geospatial_data - Manipulation of locations on the planet and
    22associated attributes.
    33
    44"""
     5
    56from sys import maxint
    67from os import access, F_OK, R_OK,remove
     
    910from string import lower
    1011from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
    11                         size, shape
    12 #from Array import tolist
     12                    size, shape
    1313from RandomArray import randint, seed, get_seed
    1414from copy import deepcopy
    15 
    16 #from MA import tolist
    17 
    18 from Scientific.IO.NetCDF import NetCDFFile   
    19 from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL   
     15from Scientific.IO.NetCDF import NetCDFFile
     16from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
    2017from anuga.utilities.numerical_tools import ensure_numeric
    2118from anuga.coordinate_transforms.geo_reference import Geo_reference, \
     
    2320from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
    2421from anuga.utilities.anuga_exceptions import ANUGAError
    25 from anuga.config import points_file_block_line_size as MAX_READ_LINES 
    26 #from anuga.fit_interpolate.benchmark_least_squares import mem_usage
    27  
     22from anuga.config import points_file_block_line_size as MAX_READ_LINES
     23
    2824
    2925DEFAULT_ATTRIBUTE = 'elevation'
    3026
     27
     28##
     29# @brief ??
    3130class Geospatial_data:
    3231
     32    ##
     33    # @brief
     34    # @param data_points Mx2 iterable of tuples or array of x,y coordinates.
     35    # @param attributes Associated values for each data point.
     36    # @param geo_reference ??
     37    # @param default_attribute_name ??
     38    # @param file_name
     39    # @param latitudes ??
     40    # @param longitudes ??
     41    # @param points_are_lats_longs True if points are lat/long, not UTM.
     42    # @param max_read_lines Size of block to read, if blocking.
     43    # @param load_file_now True if blocking but we eant to read file now.
     44    # @param verbose True if this class instance is verbose.
    3345    def __init__(self,
    3446                 data_points=None, # this can also be a points file name
     
    4355                 load_file_now=True,
    4456                 verbose=False):
    45 
    46        
    4757        """
    4858        Create instance from data points and associated attributes
     
    5767        in the dictionary represent the attribute names, in the former
    5868        the attribute will get the default name "elevation".
    59        
     69
    6070        geo_reference: Object representing the origin of the data
    6171        points. It contains UTM zone, easting and northing and data
     
    7080        latitudes, longitudes: Vectors of latitudes and longitudes,
    7181        used to specify location instead of points.
    72        
     82
    7383        points_are_lats_longs: Set this as true if the points are actually
    7484        lats and longs, not UTM
     
    7989        load_file_now:  If true the file is automatically loaded
    8090        into the geospatial instance. Used when blocking.
    81        
    82         file_name: Name of input netCDF file or .txt file. netCDF file must 
     91
     92        file_name: Name of input netCDF file or .txt file. netCDF file must
    8393        have dimensions "points" etc.
    8494        .txt file is a comma seperated file with x, y and attribute
    8595        data.
    86        
     96
    8797        The first line has the titles of the columns.  The first two
    8898        column titles are checked to see if they start with lat or
     
    91101        UTM.  Otherwise the first two columns are assumed to be the x
    92102        and y, and the title names acually used are ignored.
    93  
    94        
     103
     104
    95105        The format for a .txt file is:
    96106            1st line:     [column names]
     
    105115        The first two columns have to be x, y or lat, long
    106116        coordinates.
    107      
    108        
     117
     118
    109119        The format for a Points dictionary is:
    110 
    111120          ['pointlist'] a 2 column array describing points. 1st column x,
    112121          2nd column y.
     
    119128            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
    120129            dic['attributelist']['elevation'] = [[7.0,5.0]
    121                
     130
    122131        verbose:
    123132
    124          
    125133        """
    126134
     
    130138
    131139        self.set_verbose(verbose)
    132         self.geo_reference=None #create the attribute
     140        self.geo_reference = None #create the attribute
    133141        self.file_name = file_name
    134        
     142
    135143        if max_read_lines is None:
    136144            self.max_read_lines = MAX_READ_LINES
     
    139147
    140148        if file_name is None:
    141             if latitudes is not None or longitudes is not None or \
    142                    points_are_lats_longs:
    143                 data_points, geo_reference =  \
    144                              _set_using_lat_long(latitudes=latitudes,
    145                                   longitudes=longitudes,
    146                                   geo_reference=geo_reference,
    147                                   data_points=data_points,
    148                                   points_are_lats_longs=points_are_lats_longs)
     149            if latitudes is not None \
     150               or longitudes is not None \
     151               or points_are_lats_longs:
     152                data_points, geo_reference = \
     153                    _set_using_lat_long(latitudes=latitudes,
     154                                        longitudes=longitudes,
     155                                        geo_reference=geo_reference,
     156                                        data_points=data_points,
     157                                        points_are_lats_longs=\
     158                                            points_are_lats_longs)
    149159            self.check_data_points(data_points)
    150160            self.set_attributes(attributes)
    151161            self.set_geo_reference(geo_reference)
    152162            self.set_default_attribute_name(default_attribute_name)
    153 
    154163        elif load_file_now is True:
    155164            # watch for case where file name and points,
     
    159168            if verbose is True:
    160169                if file_name is not None:
    161                     print 'Loading Geospatial data from file: %s' %file_name
    162            
     170                    print 'Loading Geospatial data from file: %s' % file_name
     171
    163172            self.import_points_file(file_name, verbose=verbose)
    164                
     173
    165174            self.check_data_points(self.data_points)
    166             self.set_attributes(self.attributes) 
     175            self.set_attributes(self.attributes)
    167176            self.set_geo_reference(self.geo_reference)
    168177            self.set_default_attribute_name(default_attribute_name)
     
    170179        if verbose is True:
    171180            if file_name is not None:
    172                 print 'Geospatial data created from file: %s' %file_name
     181                print 'Geospatial data created from file: %s' % file_name
    173182                if load_file_now is False:
    174183                    print 'Data will be loaded blockwise on demand'
     
    182191                        #print 'data as a pts NetCDF format'
    183192
     193    ##
     194    # @brief Return length of the points set.
    184195    def __len__(self):
    185196        return len(self.data_points)
    186197
     198    ##
     199    # @brief Return a string representation of the points set.
    187200    def __repr__(self):
    188201        return str(self.get_data_points(absolute=True))
    189    
    190    
     202
     203    ##
     204    # @brief Save points data in instance.
     205    # @param data_points Points data to store in instance and check.
     206    # @note Throws ValueError exception if no data.
    191207    def check_data_points(self, data_points):
    192208        """Checks data points
    193209        """
    194        
     210
    195211        if data_points is None:
    196212            self.data_points = None
    197213            msg = 'There is no data or file provided!'
    198214            raise ValueError, msg
    199            
     215
    200216        else:
    201217            self.data_points = ensure_numeric(data_points)
    202             #print "self.data_points.shape",self.data_points.shape
    203218            if not (0,) == self.data_points.shape:
    204219                assert len(self.data_points.shape) == 2
    205220                assert self.data_points.shape[1] == 2
    206221
     222    ##
     223    # @brief Check and assign attributes data.
     224    # @param attributes Dictionary or scalar to save as .attributes.
     225    # @note Throws exception if unable to convert dict keys to numeric.
    207226    def set_attributes(self, attributes):
    208227        """Check and assign attributes dictionary
    209228        """
    210        
     229
    211230        if attributes is None:
    212231            self.attributes = None
    213232            return
    214        
     233
    215234        if not isinstance(attributes, DictType):
    216235            #Convert single attribute into dictionary
    217236            attributes = {DEFAULT_ATTRIBUTE: attributes}
    218237
    219         #Check input attributes   
     238        #Check input attributes
    220239        for key in attributes.keys():
    221240            try:
     
    226245                raise msg
    227246
    228         self.attributes = attributes   
     247        self.attributes = attributes
    229248
    230249    #def set_geo_reference(self, geo_reference):
     
    232251    #    self.setgeo_reference(geo_reference)
    233252
     253    ##
     254    # @brief Set the georeference of geospatial data.
     255    # @param geo_reference The georeference data    to set.
     256    # @note Will raise exception if param not instance of Geo_reference.
    234257    def set_geo_reference(self, geo_reference):
    235258        """Set the georeference of geospatial data.
    236        
     259
    237260        It can also be used to change the georeference and will ensure that
    238261        the absolute coordinate values are unchanged.
    239262        """
     263
    240264        from anuga.coordinate_transforms.geo_reference import Geo_reference
    241265
     
    243267            # Use default - points are in absolute coordinates
    244268            geo_reference = Geo_reference()
    245            
    246         # Allow for tuple (zone, xllcorner, yllcorner)   
     269
     270        # Allow for tuple (zone, xllcorner, yllcorner)
    247271        geo_reference = ensure_geo_reference(geo_reference)
    248        
     272
    249273        if not isinstance(geo_reference, Geo_reference):
    250             # FIXME (Ole): This exception will be raised even 
     274            # FIXME (Ole): This exception will be raised even
    251275            # if geo_reference is None. Is that the intent Duncan?
    252             msg = 'Argument geo_reference must be a valid Geo_reference \n'
     276            msg = 'Argument geo_reference must be a valid Geo_reference '
    253277            msg += 'object or None.'
    254278            raise msg
     
    258282        if  self.geo_reference is not None:
    259283            self.data_points = self.get_data_points(geo_reference=geo_reference)
    260            
     284
    261285        self.geo_reference = geo_reference
    262286
    263 
     287    ##
     288    # @brief Set default attribute name.
     289    # @param default_attribute_name The default to save.
    264290    def set_default_attribute_name(self, default_attribute_name):
    265291        self.default_attribute_name = default_attribute_name
    266292
     293    ##
     294    # @brief Set the instance verbose flag.
     295    # @param verbose The value to save.
     296    # @note Will raise exception if param is not True or False.
    267297    def set_verbose(self, verbose=False):
    268298        if verbose in [False, True]:
    269299            self.verbose = verbose
    270300        else:
    271             msg = 'Illegal value: %s' %str(verbose)
     301            msg = 'Illegal value: %s' % str(verbose)
    272302            raise Exception, msg
    273303
     304    ##
     305    # @brief Clip geospatial data by a given polygon.
     306    # @param polygon The polygon to clip with.
     307    # @param closed True if points on clip boundary are not included in result.
     308    # @param verbose True if this function is verbose.
    274309    def clip(self, polygon, closed=True, verbose=False):
    275310        """Clip geospatial data by a polygon
     
    281316          regarded as belonging to the polygon (closed = True)
    282317          or not (closed = False). Default is True.
    283            
     318
    284319        Output
    285320          New geospatial data object representing points inside
    286321          specified polygon.
    287        
    288        
    289         Note - this method is non-destructive and leaves the data in 'self' 
     322
     323
     324        Note - this method is non-destructive and leaves the data in 'self'
    290325        unchanged
    291326        """
     
    297332            polygon = polygon.get_data_points()
    298333
    299         points = self.get_data_points()   
    300 #        if verbose: print '%s points:%s' %(verbose,points)
     334        points = self.get_data_points()
    301335        inside_indices = inside_polygon(points, polygon, closed, verbose)
    302336
    303337        clipped_G = self.get_sample(inside_indices)
    304338
    305 #        clipped_points = take(points, inside_indices)
    306 
    307         # Clip all attributes
    308 #        attributes = self.get_all_attributes()
    309 
    310 #        clipped_attributes = {}
    311 #        if attributes is not None:
    312 #            for key, att in attributes.items():
    313 #                clipped_attributes[key] = take(att, inside_indices)
    314 
    315 #        return Geospatial_data(clipped_points, clipped_attributes)
    316339        return clipped_G
    317        
    318        
    319     def clip_outside(self, polygon, closed=True,verbose=False):
     340
     341    ##
     342    # @brief Clip points data by polygon, return points outside polygon.
     343    # @param polygon The polygon to clip with.
     344    # @param closed True if points on clip boundary are not included in result.
     345    # @param verbose True if this function is verbose.
     346    def clip_outside(self, polygon, closed=True, verbose=False):
    320347        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
    321348
     
    326353          regarded as belonging to the polygon (closed = True)
    327354          or not (closed = False). Default is True.
    328          
     355
    329356        Output
    330357          Geospatial data object representing point OUTSIDE specified polygon
    331        
    332358        """
    333359
     
    338364            polygon = polygon.get_data_points()
    339365
    340         points = self.get_data_points()   
     366        points = self.get_data_points()
    341367        outside_indices = outside_polygon(points, polygon, closed,verbose)
    342368
    343369        clipped_G = self.get_sample(outside_indices)
    344370
    345 #        clipped_points = take(points, outside_indices)
    346 
    347         # Clip all attributes
    348 #        attributes = self.get_all_attributes()
    349 
    350 #        clipped_attributes = {}
    351 #        if attributes is not None:
    352 #            for key, att in attributes.items():
    353 #                clipped_attributes[key] = take(att, outside_indices)
    354 
    355 #        return Geospatial_data(clipped_points, clipped_attributes)
    356371        return clipped_G
    357372
    358    
     373    ##
     374    # @brief Get instance geo_reference data.
    359375    def get_geo_reference(self):
    360376        return self.geo_reference
    361        
     377
     378    ##
     379    # @brief Get coordinates for all data points as an Nx2 array.
     380    # @param absolute If True, return UTM, else relative to xll/yll corners.
     381    # @param geo_reference If supplied, points are relative to it.
     382    # @param as_lat_long If True, return points as lat/lon.
     383    # @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
     384    # @return A set of data points, in appropriate form.
    362385    def get_data_points(self, absolute=True, geo_reference=None,
    363386                        as_lat_long=False, isSouthHemisphere=True):
     
    370393        If a geo_reference is passed the points are returned relative
    371394        to that geo_reference.
    372        
    373         isSH (isSouthHemisphere) is only used when getting data 
    374         points "as_lat_long" is True and if FALSE will return lats and 
     395
     396        isSH (isSouthHemisphere) is only used when getting data
     397        points "as_lat_long" is True and if FALSE will return lats and
    375398        longs valid for the Northern Hemisphere.
    376399
    377400        Default: absolute is True.
    378401        """
     402
    379403        if as_lat_long is True:
    380404            msg = "Points need a zone to be converted into lats and longs"
     
    384408            lats_longs = []
    385409            for point in self.get_data_points(True):
    386            
    387410                # UTMtoLL(northing, easting, zone,
    388                 lat_calced, long_calced = UTMtoLL(point[1],point[0],
     411                lat_calced, long_calced = UTMtoLL(point[1], point[0],
    389412                                                  zone, isSouthHemisphere)
    390413                lats_longs.append((lat_calced, long_calced)) # to hash
    391414            return lats_longs
    392            
     415
    393416        if absolute is True and geo_reference is None:
    394417            return self.geo_reference.get_absolute(self.data_points)
    395418        elif geo_reference is not None:
    396             return geo_reference.change_points_geo_ref(self.data_points, 
     419            return geo_reference.change_points_geo_ref(self.data_points,
    397420                                                       self.geo_reference)
    398421        else:
    399             # If absolute is False 
     422            # If absolute is False
    400423            return self.data_points
    401424
    402    
     425    ##
     426    # @brief Get value for attribute name.
     427    # @param attribute_name Name to get value for.
     428    # @note If name passed is None, return default attribute value.
    403429    def get_attributes(self, attribute_name=None):
    404430        """Return values for one named attribute.
     
    411437                attribute_name = self.default_attribute_name
    412438            else:
    413                 attribute_name = self.attributes.keys()[0] 
     439                attribute_name = self.attributes.keys()[0]
    414440                # above line takes the first one from keys
    415        
     441
    416442        if self.verbose is True:
    417443            print 'Using attribute %s' %attribute_name
    418             print 'Available attributes: %s' %(self.attributes.keys())       
    419 
    420         msg = 'Attribute name %s does not exist in data set' %attribute_name
     444            print 'Available attributes: %s' %(self.attributes.keys())
     445
     446        msg = 'Attribute name %s does not exist in data set' % attribute_name
    421447        assert self.attributes.has_key(attribute_name), msg
    422448
    423449        return self.attributes[attribute_name]
    424450
     451    ##
     452    # @brief Get all instance attributes.
     453    # @return The instance attribute dictionary, or None if no attributes.
    425454    def get_all_attributes(self):
    426         """
    427         Return values for all attributes.
     455        """Return values for all attributes.
    428456        The return value is either None or a dictionary (possibly empty).
    429457        """
     
    431459        return self.attributes
    432460
     461    ##
     462    # @brief Override __add__() to allow addition of geospatial objects.
     463    # @param self This object.
     464    # @param other The second object.
     465    # @return The new geospatial object.
    433466    def __add__(self, other):
    434         """
    435         Returns the addition of 2 geospatical objects,
     467        """Returns the addition of 2 geospatical objects,
    436468        objects are concatencated to the end of each other
    437            
    438         NOTE: doesn't add if objects contain different 
    439         attributes 
    440        
     469
     470        NOTE: doesn't add if objects contain different
     471        attributes
     472
    441473        Always return absolute points!
    442         This alse means, that if you add None to the object,
     474        This also means, that if you add None to the object,
    443475        it will be turned into absolute coordinates
    444476
    445477        other can be None in which case nothing is added to self.
    446478        """
    447 
    448479
    449480        # find objects zone and checks if the same
     
    451482        zone1 = geo_ref1.get_zone()
    452483
    453 
    454484        if other is not None:
    455 
    456485            geo_ref2 = other.get_geo_reference()
    457486            zone2 = geo_ref2.get_zone()
    458 
    459487            geo_ref1.reconcile_zones(geo_ref2)
    460 
    461        
    462488            new_points = concatenate((self.get_data_points(absolute=True),
    463489                                      other.get_data_points(absolute=True)),
    464                                       axis = 0)       
    465 
    466        
    467      
     490                                     axis = 0)
     491
    468492            # Concatenate attributes if any
    469493            if self.attributes is None:
    470494                if other.attributes is not None:
    471                     msg = 'Geospatial data must have the same \n'
     495                    msg = 'Geospatial data must have the same '
    472496                    msg += 'attributes to allow addition.'
    473497                    raise Exception, msg
    474                
     498
    475499                new_attributes = None
    476             else:   
     500            else:
    477501                new_attributes = {}
    478502                for x in self.attributes.keys():
    479503                    if other.attributes.has_key(x):
    480 
    481504                        attrib1 = self.attributes[x]
    482505                        attrib2 = other.attributes[x]
    483506                        new_attributes[x] = concatenate((attrib1, attrib2))
    484 
    485507                    else:
    486508                        msg = 'Geospatial data must have the same \n'
    487509                        msg += 'attributes to allow addition.'
    488510                        raise Exception, msg
    489 
    490 
    491511        else:
    492512            #other is None:
    493            
    494513            new_points = self.get_data_points(absolute=True)
    495514            new_attributes = self.attributes
    496515
    497                    
    498 
    499516        # Instantiate new data object and return absolute coordinates
    500517        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
    501         return Geospatial_data(new_points,
    502                                new_attributes,
    503                                new_geo_ref)
    504 
    505 
     518        return Geospatial_data(new_points, new_attributes, new_geo_ref)
     519
     520    ##
     521    # @brief Override the addition case where LHS isn't geospatial object.
     522    # @param self This object.
     523    # @param other The second object.
     524    # @return The new geospatial object.
    506525    def __radd__(self, other):
    507526        """Handle cases like None + Geospatial_data(...)
     
    510529        return self + other
    511530
    512    
    513    
    514531    ###
    515532    #  IMPORT/EXPORT POINTS FILES
    516533    ###
    517534
     535    ##
     536    # @brief Import a .txt, .csv or .pts points data file.
     537    # @param file_name
     538    # @param delimiter
     539    # @param verbose True if this function is to be verbose.
     540    # @note Will throw IOError or SyntaxError if there is a problem.
    518541    def import_points_file(self, file_name, delimiter=None, verbose=False):
    519542        """ load an .txt, .csv or .pts file
     
    523546        Post condition: self.attributes dictionary has been set
    524547        """
    525        
     548
    526549        if access(file_name, F_OK) == 0 :
    527             msg = 'File %s does not exist or is not accessible' %file_name
     550            msg = 'File %s does not exist or is not accessible' % file_name
    528551            raise IOError, msg
    529        
     552
    530553        attributes = {}
    531554        if file_name[-4:]== ".pts":
    532555            try:
    533                 data_points, attributes, geo_reference =\
     556                data_points, attributes, geo_reference = \
    534557                             _read_pts_file(file_name, verbose)
    535             except IOError, e:   
    536                 msg = 'Could not open file %s ' %file_name
    537                 raise IOError, msg 
    538        
     558            except IOError, e:
     559                msg = 'Could not open file %s ' % file_name
     560                raise IOError, msg
    539561        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
    540562            try:
    541                 data_points, attributes, geo_reference =\
     563                data_points, attributes, geo_reference = \
    542564                             _read_csv_file(file_name, verbose)
    543565            except IOError, e:
    544566                # This should only be if a file is not found
    545                 msg = 'Could not open file %s. ' %file_name
     567                msg = 'Could not open file %s. ' % file_name
    546568                msg += 'Check the file location.'
    547569                raise IOError, msg
     
    550572                msg = 'Could not open file %s. \n' %file_name
    551573                msg += Error_message['IOError']
    552                 #print "msg", msg
    553574                raise SyntaxError, msg
    554         else:     
     575        else:
    555576            msg = 'Extension %s is unknown' %file_name[-4:]
    556577            raise IOError, msg
    557 #        print'in import data_points', data_points
    558 #        print'in import attributes', attributes
    559 #        print'in import data_points', geo_reference
     578
    560579        self.data_points = data_points
    561580        self.attributes = attributes
    562581        self.geo_reference = geo_reference
    563    
    564 #        return all_data
    565    
    566     def export_points_file(self, file_name, absolute=True,
     582
     583    ##
     584    # @brief Write points data to a file (.csv or .pts).
     585    # @param file_name Path to file to write.
     586    # @param absolute ??
     587    # @param as_lat_long ??
     588    # @param isSouthHemisphere ??
     589    def export_points_file(self, file_name, absolute=True,
    567590                           as_lat_long=False, isSouthHemisphere=True):
    568        
     591
    569592        """
    570593        write a points file, file_name, as a text (.csv) or binary (.pts) file
    571594        file_name is the file name, including the extension
    572595        The point_dict is defined at the top of this file.
    573        
    574         If absolute is True data the xll and yll are added to the points value 
     596
     597        If absolute is True data the xll and yll are added to the points value
    575598        and the xll and yll of the geo_reference are set to 0.
    576        
    577         If absolute is False data points at returned as relative to the xll 
     599
     600        If absolute is False data points at returned as relative to the xll
    578601        and yll and geo_reference remains uneffected
    579        
    580         isSouthHemisphere: is only used when getting data 
    581         points "as_lat_long" is True and if FALSE will return lats and 
     602
     603        isSouthHemisphere: is only used when getting data
     604        points "as_lat_long" is True and if FALSE will return lats and
    582605        longs valid for the Northern Hemisphere.
    583        
    584606        """
    585607
     
    590612                geo_ref.yllcorner = 0
    591613                _write_pts_file(file_name,
    592                                 self.get_data_points(absolute), 
     614                                self.get_data_points(absolute),
    593615                                self.get_all_attributes(),
    594616                                geo_ref)
    595617            else:
    596618                _write_pts_file(file_name,
    597                                 self.get_data_points(absolute), 
     619                                self.get_data_points(absolute),
    598620                                self.get_all_attributes(),
    599621                                self.get_geo_reference())
    600                
     622
    601623        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
    602624            msg = "ERROR: trying to write a .txt file with relative data."
     
    605627                            self.get_data_points(absolute=True,
    606628                                                 as_lat_long=as_lat_long,
    607                                   isSouthHemisphere=isSouthHemisphere), 
     629                                  isSouthHemisphere=isSouthHemisphere),
    608630                            self.get_all_attributes(),
    609631                            as_lat_long=as_lat_long)
    610              
     632
    611633        elif file_name[-4:] == ".urs" :
    612634            msg = "ERROR: Can not write a .urs file as a relative file."
     
    615637                            self.get_data_points(as_lat_long=True,
    616638                                  isSouthHemisphere=isSouthHemisphere))
    617                                                          
     639
    618640        else:
    619641            msg = 'Unknown file type %s ' %file_name
    620             raise IOError, msg
    621        
     642            raise IOError, msg
     643
     644    ##
     645    # @brief Get a subset of data that is referred to by 'indices'.
     646    # @param indices A list of indices to select data subset with.
     647    # @return A geospatial object containing data subset.
    622648    def get_sample(self, indices):
    623649        """ Returns a object which is a subset of the original
    624650        and the data points and attributes in this new object refer to
    625651        the indices provided
    626        
     652
    627653        Input
    628654            indices- a list of integers that represent the new object
    629655        Output
    630             New geospatial data object representing points specified by
    631             the indices
    632             """
     656            New geospatial data object representing points specified by
     657            the indices
     658        """
     659
    633660        #FIXME: add the geo_reference to this
    634 #        print 'hello from get_sample'
    635661        points = self.get_data_points()
    636662        sampled_points = take(points, indices)
     
    643669                sampled_attributes[key] = take(att, indices)
    644670
    645 #        print 'goodbye from get_sample'
    646 
    647671        return Geospatial_data(sampled_points, sampled_attributes)
    648    
    649    
    650     def split(self, factor=0.5,seed_num=None, verbose=False):
    651         """Returns two       
     672
     673    ##
     674    # @brief Split one geospatial object into two.
     675    # @param factor Relative size to make first result object.
     676    # @param seed_num Random 'seed' - used only for unit test.
     677    # @param verbose True if this function is to be verbose.
     678    # @note Points in each result object are selected randomly.
     679    def split(self, factor=0.5, seed_num=None, verbose=False):
     680        """Returns two
    652681        geospatial_data object, first is the size of the 'factor'
    653682        smaller the original and the second is the remainder. The two
    654683        new object are disjoin set of each other.
    655        
    656         Points of the two new object have selected RANDOMLY. 
    657        
     684
     685        Points of the two new object have selected RANDOMLY.
     686
    658687        This method create two lists of indices which are passed into
    659688        get_sample.  The lists are created using random numbers, and
    660689        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
    661690        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
    662                
    663         Input - the factor which to split the object, if 0.1 then 10% of the
    664             together object will be returned
    665        
    666         Output - two geospatial_data objects that are disjoint sets of the 
    667             original
    668             """
    669        
     691
     692        Input -  the factor which to split the object, if 0.1 then 10% of the
     693                 together object will be returned
     694
     695        Output - two geospatial_data objects that are disjoint sets of the
     696                 original
     697        """
     698
    670699        i=0
    671700        self_size = len(self)
     
    673702        remainder_list = []
    674703        new_size = round(factor*self_size)
    675        
     704
    676705        # Find unique random numbers
    677706        if verbose: print "make unique random number list and get indices"
     
    679708        total=array(range(self_size))
    680709        total_list = total.tolist()
    681         if verbose: print "total list len",len(total_list)
    682                
    683         # There will be repeated random numbers however will not be a
     710
     711        if verbose: print "total list len", len(total_list)
     712
     713        # There will be repeated random numbers however will not be a
    684714        # problem as they are being 'pop'ed out of array so if there
    685         # are two numbers the same they will pop different indicies, 
     715        # are two numbers the same they will pop different indicies,
    686716        # still basically random
    687717        ## create list of non-unquie random numbers
    688718        if verbose: print "create random numbers list %s long" %new_size
    689        
     719
    690720        # Set seed if provided, mainly important for unit test!
    691721        # plus recalcule seed when no seed provided.
    692722        if seed_num != None:
    693             seed(seed_num,seed_num)
     723            seed(seed_num, seed_num)
    694724        else:
    695725            seed()
     726
    696727        if verbose: print "seed:", get_seed()
    697        
    698         #print 'size',self_size, new_size
    699         random_num = randint(0,self_size-1,(int(new_size),))
    700         #print 'random num',random_num
     728
     729        random_num = randint(0, self_size-1, (int(new_size),))
    701730        random_num = random_num.tolist()
    702731
    703732        #need to sort and reverse so the pop() works correctly
    704733        random_num.sort()
    705         random_num.reverse()       
    706        
     734        random_num.reverse()
     735
    707736        if verbose: print "make random number list and get indices"
     737
    708738        j=0
    709739        k=1
    710740        remainder_list = total_list[:]
     741
    711742        # pops array index (random_num) from remainder_list
    712         # (which starts as the 
    713         # total_list and appends to random_list 
     743        # (which starts as the
     744        # total_list and appends to random_list
    714745        random_num_len = len(random_num)
    715746        for i in random_num:
    716747            random_list.append(remainder_list.pop(i))
    717             j+=1
     748            j += 1
    718749            #prints progress
    719             if verbose and round(random_num_len/10*k)==j:
    720                 print '(%s/%s)' %(j, random_num_len)
    721                 k+=1
    722        
     750            if verbose and round(random_num_len/10*k) == j:
     751                print '(%s/%s)' % (j, random_num_len)
     752                k += 1
     753
    723754        # FIXME: move to tests, it might take a long time
    724         # then create an array of random lenght between 500 and 1000, 
    725         # and use a random factor between 0 and 1 
     755        # then create an array of random lenght between 500 and 1000,
     756        # and use a random factor between 0 and 1
    726757        # setup for assertion
    727758        test_total = random_list[:]
    728759        test_total.extend(remainder_list)
    729         test_total.sort() 
    730         msg = 'The two random lists made from the original list when added '\
    731          'together DO NOT equal the original list'
    732         assert (total_list==test_total),msg
     760        test_total.sort()
     761        msg = 'The two random lists made from the original list when added ' \
     762              'together DO NOT equal the original list'
     763        assert total_list == test_total, msg
    733764
    734765        # Get new samples
     
    740771        return G1, G2
    741772
     773    ##
     774    # @brief Allow iteration over this object.
    742775    def __iter__(self):
    743776        """Read in the header, number_of_points and save the
     
    745778        """
    746779        from Scientific.IO.NetCDF import NetCDFFile
    747         #print "Starting to block"
    748780        #FIXME - what to do if the file isn't there
    749781
    750782        # FIXME (Ole): Shouldn't this go into the constructor?
    751         # This method acts like the constructor when blcoking.
     783        # This method acts like the constructor when blocking.
    752784        # ... and shouldn't it be called block_size?
    753         # 
     785        #
    754786        if self.max_read_lines is None:
    755787            self.max_read_lines = MAX_READ_LINES
     788
    756789        if self.file_name[-4:] == ".pts":
    757            
    758790            # See if the file is there.  Throw a QUIET IO error if it isn't
    759791            fd = open(self.file_name,'r')
    760792            fd.close()
    761    
     793
    762794            # Throws prints to screen if file not present
    763795            self.fid = NetCDFFile(self.file_name, 'r')
    764            
    765             self.blocking_georef, self.blocking_keys, self.number_of_points =\
    766                                   _read_pts_file_header(self.fid,
    767                                                         self.verbose)
     796
     797            (self.blocking_georef,
     798             self.blocking_keys,
     799             self.number_of_points) = _read_pts_file_header(self.fid,
     800                                                            self.verbose)
    768801            self.start_row = 0
    769             self.last_row = self.number_of_points           
     802            self.last_row = self.number_of_points
    770803            self.show_verbose = 0
    771804            self.verbose_block_size = (self.last_row + 10)/10
     
    774807            # This computes the number of full blocks. The last block may be
    775808            # smaller and won't be ircluded in this estimate.
    776            
     809
    777810            if self.verbose is True:
    778                 print 'Reading %d points (in ~%d blocks) from file %s. '\
    779                       %(self.number_of_points,
    780                         self.number_of_blocks,
    781                         self.file_name),
    782                 print 'Each block consists of %d data points'\
    783                       %self.max_read_lines
    784            
     811                print 'Reading %d points (in ~%d blocks) from file %s. ' \
     812                      % (self.number_of_points,
     813                         self.number_of_blocks,
     814                         self.file_name),
     815                print 'Each block consists of %d data points' \
     816                      % self.max_read_lines
     817
    785818        else:
    786819            # Assume the file is a csv file
    787820            file_pointer = open(self.file_name)
    788             self.header, self.file_pointer = \
    789                          _read_csv_file_header(file_pointer)
     821            self.header, self.file_pointer = _read_csv_file_header(file_pointer)
    790822            self.blocking_georef = None # Used for reconciling zones
    791823
    792824        return self
    793    
    794    
     825
     826    ##
     827    # @brief Read another block into the instance.
    795828    def next(self):
    796         """ read a block, instanciate a new geospatial and return it"""
    797        
     829        """read a block, instanciate a new geospatial and return it"""
     830
    798831        if self.file_name[-4:] == ".pts":
    799832            if self.start_row == self.last_row:
     
    812845                fin_row = self.last_row
    813846
    814                
    815            
    816847            if self.verbose is True:
    817                 if self.show_verbose >= self.start_row and \
    818                        self.show_verbose < fin_row:
    819 
    820                    
    821                     #print 'Doing %d of %d' %(self.start_row, self.last_row+10)
    822 
     848                if self.show_verbose >= self.start_row \
     849                   and self.show_verbose < fin_row:
    823850                    print 'Reading block %d (points %d to %d) out of %d'\
    824851                          %(self.block_number,
     
    827854                            self.number_of_blocks)
    828855
    829                    
    830856                    self.show_verbose += max(self.max_read_lines,
    831857                                             self.verbose_block_size)
    832858
    833                  
     859
    834860            # Read next block
    835             pointlist, att_dict, = \
    836                        _read_pts_file_blocking(self.fid,
    837                                                self.start_row,
    838                                                fin_row,
    839                                                self.blocking_keys)
    840            
     861            pointlist, att_dict, = _read_pts_file_blocking(self.fid,
     862                                                           self.start_row,
     863                                                           fin_row,
     864                                                           self.blocking_keys)
     865
    841866            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
    842867            self.start_row = fin_row
    843            
    844             self.block_number += 1           
    845            
     868
     869            self.block_number += 1
     870
    846871        else:
    847872            # Assume the file is a csv file
    848873            try:
    849                 #print "self.max_read_lines", self.max_read_lines
    850                 pointlist, att_dict, geo_ref, self.file_pointer = \
    851                    _read_csv_file_blocking( self.file_pointer,
    852                                             self.header[:],
    853                                             max_read_lines=self.max_read_lines,
    854                                             verbose=self.verbose)
     874                (pointlist,
     875                 att_dict,
     876                 geo_ref,
     877                 self.file_pointer) = \
     878                        _read_csv_file_blocking(self.file_pointer,
     879                                                self.header[:],
     880                                                max_read_lines=\
     881                                                    self.max_read_lines,
     882                                                verbose=self.verbose)
    855883
    856884                # Check that the zones haven't changed.
     
    859887                    self.blocking_georef = geo_ref
    860888                elif self.blocking_georef is not None:
    861                    
    862889                    msg = 'Geo reference given, then not given.'
    863                     msg += ' This should not happen.' 
     890                    msg += ' This should not happen.'
    864891                    raise ValueError, msg
    865892                geo = Geospatial_data(pointlist, att_dict, geo_ref)
     
    879906                del self.file_pointer
    880907                # This should only be if there is a format error
    881                 msg = 'Could not open file %s. \n' %self.file_name
     908                msg = 'Could not open file %s. \n' % self.file_name
    882909                msg += Error_message['IOError']
    883910                raise SyntaxError, msg
    884911        return geo
    885912
    886    
     913
    887914##################### Error messages ###########
    888915Error_message = {}
     
    901928Em['IOError'] += "The attribute values must be numeric.\n"
    902929
     930##
     931# @brief ??
     932# @param latitudes ??
     933# @param longitudes ??
     934# @param geo_reference ??
     935# @param data_points ??
     936# @param points_are_lats_longs ??
    903937def _set_using_lat_long(latitudes,
    904938                        longitudes,
     
    909943    if the points has lat long info, assume it is in (lat, long) order.
    910944    """
    911    
     945
    912946    if geo_reference is not None:
    913         msg = """A georeference is specified yet latitude and longitude
    914         are also specified!"""
     947        msg = "A georeference is specified yet latitude and longitude " \
     948              "are also specified!"
    915949        raise ValueError, msg
    916    
     950
    917951    if data_points is not None and not points_are_lats_longs:
    918         msg = """Data points are specified yet latitude and
    919         longitude are also specified."""
     952        msg = "Data points are specified yet latitude and longitude are " \
     953              "also specified."
    920954        raise ValueError, msg
    921    
     955
    922956    if points_are_lats_longs:
    923957        if data_points is None:
    924             msg = """Data points are not specified."""
     958            msg = "Data points are not specified."
    925959            raise ValueError, msg
    926960        lats_longs = ensure_numeric(data_points)
    927961        latitudes = ravel(lats_longs[:,0:1])
    928962        longitudes = ravel(lats_longs[:,1:])
    929        
     963
    930964    if latitudes is None and longitudes is None:
    931         msg = """Latitudes and Longitudes are not specified."""
     965        msg = "Latitudes and Longitudes are not specified."
    932966        raise ValueError, msg
    933    
     967
    934968    if latitudes is None:
    935         msg = """Longitudes are specified yet latitudes aren't."""
     969        msg = "Longitudes are specified yet latitudes aren't."
    936970        raise ValueError, msg
    937    
     971
    938972    if longitudes is None:
    939         msg = """Latitudes are specified yet longitudes aren't."""
     973        msg = "Latitudes are specified yet longitudes aren't."
    940974        raise ValueError, msg
    941    
     975
    942976    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
    943977                                                    longitudes=longitudes)
    944978    return data_points, Geo_reference(zone=zone)
    945979
    946    
     980
     981##
     982# @brief Read a .pts data file.
     983# @param file_name Path to file to read.
     984# @param verbose True if this function is to be verbose.
     985# @return (pointlist, attributes, geo_reference)
    947986def _read_pts_file(file_name, verbose=False):
    948987    """Read .pts NetCDF file
    949    
     988
    950989    Return a dic of array of points, and dic of array of attribute
    951990    eg
    952991    dic['points'] = [[1.0,2.0],[3.0,5.0]]
    953     dic['attributelist']['elevation'] = [[7.0,5.0]
    954     """   
     992    dic['attributelist']['elevation'] = [[7.0,5.0]]
     993    """
    955994
    956995    from Scientific.IO.NetCDF import NetCDFFile
    957    
     996
    958997    if verbose: print 'Reading ', file_name
    959    
    960        
     998
    961999    # See if the file is there.  Throw a QUIET IO error if it isn't
    9621000    fd = open(file_name,'r')
    9631001    fd.close()
    964    
     1002
    9651003    # Throws prints to screen if file not present
    966     fid = NetCDFFile(file_name, 'r') 
    967    
     1004    fid = NetCDFFile(file_name, 'r')
     1005
    9681006    pointlist = array(fid.variables['points'])
    9691007    keys = fid.variables.keys()
    970     if verbose: print 'Got %d variables: %s' %(len(keys), keys)
     1008
     1009    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
     1010
    9711011    try:
    9721012        keys.remove('points')
    973     except IOError, e:       
    974         fid.close()   
    975         msg = 'Expected keyword "points" but could not find it'
     1013    except IOError, e:
     1014        fid.close()
     1015        msg = "Expected keyword 'points' but could not find it"
    9761016        raise IOError, msg
    977    
     1017
    9781018    attributes = {}
    9791019    for key in keys:
    980         if verbose: print "reading attribute '%s'" %key
    981            
     1020        if verbose: print "reading attribute '%s'" % key
     1021
    9821022        attributes[key] = array(fid.variables[key])
    983    
    984    
     1023
    9851024    try:
    9861025        geo_reference = Geo_reference(NetCDFObject=fid)
    9871026    except AttributeError, e:
    9881027        geo_reference = None
    989    
     1028
    9901029    fid.close()
    991    
     1030
    9921031    return pointlist, attributes, geo_reference
    9931032
    9941033
     1034##
     1035# @brief Read a .csv data file.
     1036# @param file_name Path to the .csv file to read.
     1037# @param verbose True if this function is to be verbose.
    9951038def _read_csv_file(file_name, verbose=False):
    9961039    """Read .csv file
    997    
     1040
    9981041    Return a dic of array of points, and dic of array of attribute
    9991042    eg
    10001043    dic['points'] = [[1.0,2.0],[3.0,5.0]]
    1001     dic['attributelist']['elevation'] = [[7.0,5.0]
    1002     """
    1003    
     1044    dic['attributelist']['elevation'] = [[7.0,5.0]]
     1045    """
     1046
    10041047    file_pointer = open(file_name)
    10051048    header, file_pointer = _read_csv_file_header(file_pointer)
    10061049    try:
    1007         pointlist, att_dict, geo_ref, file_pointer = \
    1008                    _read_csv_file_blocking( \
    1009                 file_pointer,
    1010                 header,
    1011                 max_read_lines=1e30) #If the file is bigger that this, block..  # FIXME (Ole) What's up here?
    1012                
     1050        (pointlist,
     1051         att_dict,
     1052         geo_ref,
     1053         file_pointer) = _read_csv_file_blocking(file_pointer,
     1054                                                 header,
     1055                                                 max_read_lines=1e30)
     1056                                    #If the file is bigger that this, block..
     1057                                    # FIXME (Ole) What's up here?
    10131058    except ANUGAError:
    10141059        file_pointer.close()
    1015         raise
     1060        raise
     1061
    10161062    file_pointer.close()
    1017     return pointlist, att_dict, geo_ref   
     1063
     1064    return pointlist, att_dict, geo_ref
     1065
     1066
     1067##
     1068# @brief Read a .csv file header.
     1069# @param file_pointer Open descriptor of the file to read.
     1070# @param delimiter Header line delimiter string, split on this string.
     1071# @param verbose True if this function is to be verbose.
     1072# @return A tuple of (<cleaned header string>, <input file_pointer>)
    10181073
    10191074CSV_DELIMITER = ','
     1075
    10201076def _read_csv_file_header(file_pointer,
    10211077                          delimiter=CSV_DELIMITER,
    10221078                          verbose=False):
    1023 
    10241079    """Read the header of a .csv file
    10251080    Return a list of the header names
    10261081    """
     1082
    10271083    line = file_pointer.readline()
    10281084    header = clean_line(line, delimiter)
     1085
    10291086    return header, file_pointer
    10301087
     1088##
     1089# @brief Read a .csv file, with blocking.
     1090# @param file_pointer Open descriptor of the file to read.
     1091# @param header List of already read .csv header fields.
     1092# @param delimiter Delimiter string header was split on.
     1093# @param max_read_lines The max number of lines to read before blocking.
     1094# @param verbose True if this function is to be verbose.
     1095# @note Will throw IndexError, SyntaxError exceptions.
    10311096def _read_csv_file_blocking(file_pointer, header,
    10321097                            delimiter=CSV_DELIMITER,
    10331098                            max_read_lines=MAX_READ_LINES,
    10341099                            verbose=False):
    1035    
    1036 
    1037     """
    1038     Read the body of a .csv file.
     1100    """Read the body of a .csv file.
    10391101    header: The list header of the csv file, with the x and y labels.
    10401102    """
     1103
    10411104    points = []
    10421105    pointattributes = []
     
    10521115        # eg if it is a space seperated file
    10531116        raise SyntaxError
    1054    
     1117
    10551118    read_lines = 0
    1056     while read_lines<max_read_lines:
     1119    while read_lines < max_read_lines:
    10571120        line = file_pointer.readline()
    1058         #print "line",line
    1059         numbers = clean_line(line,delimiter)
     1121        numbers = clean_line(line, delimiter)
    10601122        if len(numbers) <= 1:
    10611123            break
    10621124        if line[0] == '#':
    10631125            continue
     1126
    10641127        read_lines += 1
     1128
    10651129        try:
    10661130            x = float(numbers[0])
     
    10701134            numbers.pop(0)
    10711135            if len(header) != len(numbers):
    1072                 file_pointer.close() 
    1073                 msg = "File load error. \
    1074                 There might be a problem with the file header"
     1136                file_pointer.close()
     1137                msg = "File load error. " \
     1138                      "There might be a problem with the file header."
    10751139                raise SyntaxError, msg
    10761140            for i,num in enumerate(numbers):
     
    10791143                    #attributes.append(float(num))
    10801144                    att_dict.setdefault(header[i],[]).append(float(num))
    1081         #except IOError:           
    10821145        except ValueError:
    10831146            raise SyntaxError
     1147
    10841148    if points == []:
    10851149        raise StopIteration
    1086        
    1087    
     1150
    10881151    pointlist = array(points).astype(Float)
    10891152    for key in att_dict.keys():
     
    10941157    x_header = lower(x_header[:3])
    10951158    y_header = lower(y_header[:3])
    1096     if (x_header == 'lon' or  x_header == 'lat') and \
    1097        (y_header == 'lon' or  y_header == 'lat'):
     1159    if (x_header == 'lon' or  x_header == 'lat') \
     1160       and (y_header == 'lon' or  y_header == 'lat'):
    10981161        if x_header == 'lon':
    10991162            longitudes = ravel(pointlist[:,0:1])
     
    11021165            latitudes = ravel(pointlist[:,0:1])
    11031166            longitudes = ravel(pointlist[:,1:])
    1104        
     1167
    11051168        pointlist, geo_ref = _set_using_lat_long(latitudes,
    11061169                                                 longitudes,
     
    11081171                                                 data_points=None,
    11091172                                                 points_are_lats_longs=False)
    1110     return pointlist, att_dict, geo_ref, file_pointer
    1111 
     1173
     1174    return pointlist, att_dict, geo_ref, file_pointer
     1175
     1176
     1177##
     1178# @brief Read a .pts file header.
     1179# @param fid Handle to the open .pts file.
     1180# @param verbose True if the function is to be verbose.
     1181# @return (geo_reference, keys, fid.dimensions['number_of_points'])
     1182# @note Will throw IOError and AttributeError exceptions.
    11121183def _read_pts_file_header(fid, verbose=False):
    1113 
    11141184    """Read the geo_reference and number_of_points from a .pts file
    11151185    """
    1116    
     1186
    11171187    keys = fid.variables.keys()
    11181188    try:
    11191189        keys.remove('points')
    1120     except IOError, e:       
    1121         fid.close()   
    1122         msg = 'Expected keyword "points" but could not find it'
     1190    except IOError, e:
     1191        fid.close()
     1192        msg = "Expected keyword 'points' but could not find it."
    11231193        raise IOError, msg
    1124     if verbose: print 'Got %d variables: %s' %(len(keys), keys)
    1125    
     1194
     1195    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
     1196
    11261197    try:
    11271198        geo_reference = Geo_reference(NetCDFObject=fid)
     
    11311202    return geo_reference, keys, fid.dimensions['number_of_points']
    11321203
     1204
     1205##
     1206# @brief Read the body of a .csf file, with blocking.
     1207# @param fid Handle to already open file.
     1208# @param start_row Start row index of points to return.
     1209# @param fin_row End row index of points to return.
     1210# @param keys Iterable of keys to return.
     1211# @return Tuple of (pointlist, attributes).
    11331212def _read_pts_file_blocking(fid, start_row, fin_row, keys):
    1134                             #verbose=False):
    1135    
    1136 
    1137     """
    1138     Read the body of a .csv file.
    1139     header: The list header of the csv file, with the x and y labels.
    1140     """
    1141    
     1213    """Read the body of a .csv file.
     1214    """
     1215
    11421216    pointlist = array(fid.variables['points'][start_row:fin_row])
    1143    
     1217
    11441218    attributes = {}
    11451219    for key in keys:
     
    11471221
    11481222    return pointlist, attributes
    1149    
     1223
     1224
     1225##
     1226# @brief Write a .pts data file.
     1227# @param file_name Path to the file to write.
     1228# @param write_data_points Data points to write.
     1229# @param write_attributes Attributes to write.
     1230# @param write_geo_reference Georef to write.
    11501231def _write_pts_file(file_name,
    11511232                    write_data_points,
    1152                     write_attributes=None, 
     1233                    write_attributes=None,
    11531234                    write_geo_reference=None):
    1154     """
    1155     Write .pts NetCDF file   
     1235    """Write .pts NetCDF file
    11561236
    11571237    NOTE: Below might not be valid ask Duncan : NB 5/2006
    1158    
     1238
    11591239    WARNING: This function mangles the point_atts data structure
    11601240    #F??ME: (DSG)This format has issues.
    1161     # There can't be an attribute called points 
     1241    # There can't be an attribute called points
    11621242    # consider format change
    11631243    # method changed by NB not sure if above statement is correct
    1164    
     1244
    11651245    should create new test for this
    11661246    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
    11671247    for key in point_atts.keys():
    1168         msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys) 
     1248        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
    11691249        assert key in legal_keys, msg
    1170     """   
     1250    """
     1251
    11711252    from Scientific.IO.NetCDF import NetCDFFile
     1253
    11721254    # NetCDF file definition
    11731255    outfile = NetCDFFile(file_name, 'w')
    1174    
     1256
    11751257    # Create new file
    11761258    outfile.institution = 'Geoscience Australia'
    1177     outfile.description = 'NetCDF format for compact and portable storage ' +\
     1259    outfile.description = 'NetCDF format for compact and portable storage ' \
    11781260                          'of spatial point data'
    1179    
     1261
    11801262    # Dimension definitions
    11811263    shape = write_data_points.shape[0]
    1182     outfile.createDimension('number_of_points', shape) 
     1264    outfile.createDimension('number_of_points', shape)
    11831265    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
    1184    
     1266
    11851267    # Variable definition
    11861268    outfile.createVariable('points', Float, ('number_of_points',
    11871269                                             'number_of_dimensions'))
    11881270
    1189     #create variables 
     1271    #create variables
    11901272    outfile.variables['points'][:] = write_data_points #.astype(Float32)
    11911273
     
    11941276            outfile.createVariable(key, Float, ('number_of_points',))
    11951277            outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
    1196        
     1278
    11971279    if write_geo_reference is not None:
    11981280        write_NetCDF_georeference(write_geo_reference, outfile)
    1199        
    1200     outfile.close()
    1201  
     1281
     1282    outfile.close()
     1283
     1284
     1285##
     1286# @brief Write a .csv data file.
     1287# @param file_name Path to the file to write.
     1288# @param write_data_points Data points to write.
     1289# @param write_attributes Attributes to write.
     1290# @param as_lat_long True if points are lat/lon, else x/y.
     1291# @param delimiter The CSV delimiter to use.
    12021292def _write_csv_file(file_name,
    12031293                    write_data_points,
     
    12051295                    as_lat_long=False,
    12061296                    delimiter=','):
    1207     """
    1208     export a file, file_name, with the csv format
    1209    
    1210     """
    1211     points = write_data_points
     1297    """Write a .csv file.
     1298    """
     1299
     1300    points = write_data_points
    12121301    pointattributes = write_attributes
    1213    
    1214     fd = open(file_name,'w')
     1302
     1303    fd = open(file_name, 'w')
     1304
    12151305    if as_lat_long:
    12161306        titlelist = "latitude" + delimiter + "longitude"  + delimiter
    12171307    else:
    12181308        titlelist = "x" + delimiter + "y"  + delimiter
    1219     if pointattributes is not None:   
     1309
     1310    if pointattributes is not None:
    12201311        for title in pointattributes.keys():
    12211312            titlelist = titlelist + title + delimiter
    12221313        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
    1223     fd.write(titlelist+"\n")
    1224    
     1314
     1315    fd.write(titlelist + "\n")
     1316
    12251317    # <x/lat> <y/long> [attributes]
    12261318    for i, vert in enumerate( points):
    1227 
    1228 
    1229         if pointattributes is not None:           
     1319        if pointattributes is not None:
    12301320            attlist = ","
    12311321            for att in pointattributes.keys():
    1232                 attlist = attlist + str(pointattributes[att][i])+ delimiter
     1322                attlist = attlist + str(pointattributes[att][i]) + delimiter
    12331323            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
    12341324            attlist.strip()
     
    12361326            attlist = ''
    12371327
    1238         fd.write(str(vert[0]) + delimiter +
    1239                  str(vert[1]) + attlist + "\n")
     1328        fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
    12401329
    12411330    fd.close()
    1242  
    1243 def _write_urs_file(file_name,
    1244                     points,
    1245                     delimiter=' '):
    1246     """
     1331
     1332
     1333##
     1334# @brief Write a URS file.
     1335# @param file_name The path of the file to write.
     1336# @param points
     1337# @param delimiter
     1338def _write_urs_file(file_name, points, delimiter=' '):
     1339    """Write a URS format file.
    12471340    export a file, file_name, with the urs format
    12481341    the data points are in lats and longs
    1249    
    1250     """   
    1251     fd = open(file_name,'w')   
    1252     fd.write(str(len(points))+"\n")   
     1342    """
     1343
     1344    fd = open(file_name, 'w')
     1345
     1346    # first line is # points
     1347    fd.write(str(len(points)) + "\n")
     1348
    12531349    # <lat> <long> <id#>
    12541350    for i, vert in enumerate( points):
    1255         fd.write(str(round(vert[0],7)) + delimiter + \
    1256                  str(round(vert[1],7)) + delimiter +str(i)+ "\n")
     1351        fd.write(str(round(vert[0],7)) + delimiter +
     1352                 str(round(vert[1],7)) + delimiter + str(i) + "\n")
     1353
    12571354    fd.close()
    1258    
     1355
     1356
     1357##
     1358# @brief ??
     1359# @param point_atts ??
     1360# @return ??
    12591361def _point_atts2array(point_atts):
    12601362    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
    1261    
     1363
    12621364    for key in point_atts['attributelist'].keys():
    1263         point_atts['attributelist'][key]=\
     1365        point_atts['attributelist'][key] = \
    12641366                array(point_atts['attributelist'][key]).astype(Float)
     1367
    12651368    return point_atts
    12661369
    1267    
    1268 
    1269 
     1370
     1371##
     1372# @brief Convert geospatial object to a points dictionary.
     1373# @param geospatial_data The geospatial object to convert.
     1374# @return A points dictionary.
    12701375def geospatial_data2points_dictionary(geospatial_data):
    12711376    """Convert geospatial data to points_dictionary
     
    12851390    return points_dictionary
    12861391
    1287    
     1392
     1393##
     1394# @brief Convert a points dictionary to a geospatial object.
     1395# @param points_dictionary A points dictionary to convert.
    12881396def points_dictionary2geospatial_data(points_dictionary):
    12891397    """Convert points_dictionary to geospatial data object
    12901398    """
    12911399
    1292     msg = 'Points dictionary must have key pointlist' 
     1400    msg = 'Points dictionary must have key pointlist'
    12931401    assert points_dictionary.has_key('pointlist'), msg
    12941402
    1295     msg = 'Points dictionary must have key attributelist'     
    1296     assert points_dictionary.has_key('attributelist'), msg       
     1403    msg = 'Points dictionary must have key attributelist'
     1404    assert points_dictionary.has_key('attributelist'), msg
    12971405
    12981406    if points_dictionary.has_key('geo_reference'):
     
    13001408    else:
    13011409        geo = None
    1302    
     1410
    13031411    return Geospatial_data(points_dictionary['pointlist'],
    13041412                           points_dictionary['attributelist'],
    13051413                           geo_reference = geo)
    13061414
    1307 def clean_line(line,delimiter):     
     1415
     1416##
     1417# @brief Split a string into 'clean' fields.
     1418# @param line The string to process.
     1419# @param delimiter The delimiter string to split 'line' with.
     1420# @return A list of 'cleaned' field strings.
     1421# @note Any fields that were initially zero length will be removed.
     1422def clean_line(line,delimiter):
    13081423    """Remove whitespace
    13091424    """
    1310     #print ">%s" %line
    1311     line = line.strip()
    1312     #print "stripped>%s" %line
     1425
     1426    line = line.strip()                 # probably unnecessary RW
    13131427    numbers = line.split(delimiter)
     1428
    13141429    i = len(numbers) - 1
    13151430    while i >= 0:
     
    13181433        else:
    13191434            numbers[i] = numbers[i].strip()
    1320        
    13211435        i += -1
    1322     #for num in numbers:
    1323     #    print "num>%s<" %num
     1436
    13241437    return numbers
    1325            
     1438
     1439
     1440##
     1441# @brief Ensure that points are in absolute coordinates.
     1442# @param points A list or array of points to check, or geospatial object.
     1443# @param geo_reference If supplied,
     1444# @return ??
    13261445def ensure_absolute(points, geo_reference=None):
    13271446    """Ensure that points are in absolute coordinates.
    1328    
     1447
    13291448    This function inputs several formats and
    13301449    outputs one format. - a numeric array of absolute points.
     
    13441463        #It's a string - assume it is a point file
    13451464        points = Geospatial_data(file_name=points)
    1346        
     1465
    13471466    if isinstance(points, Geospatial_data):
    13481467        points = points.get_data_points(absolute=True)
    1349         msg = 'Use a Geospatial_data object or a mesh origin. Not both.'
     1468        msg = 'Use a Geospatial_data object or a mesh origin, not both.'
    13501469        assert geo_reference == None, msg
    13511470    else:
    13521471        points = ensure_numeric(points, Float)
    1353        
     1472
    13541473    # Sort of geo_reference and convert points
    13551474    if geo_reference is None:
     
    13631482                                geo_reference[2])
    13641483        points = geo.get_absolute(points)
    1365        
    1366     # Return   
     1484
    13671485    return points
    1368      
    1369 
     1486
     1487
     1488##
     1489# @brief
     1490# @param points
     1491# @param geo_reference
     1492# @return A geospatial object.
    13701493def ensure_geospatial(points, geo_reference=None):
    13711494    """
     
    13741497
    13751498    Inputed formats are;
    1376     points: List or numeric array of coordinate pairs [xi, eta] of
    1377               points or geospatial object
     1499    points:      List or numeric array of coordinate pairs [xi, eta] of
     1500                 points or geospatial object
    13781501
    13791502    mesh_origin: A geo_reference object or 3-tuples consisting of
     
    13821505                 relative to their respective origins.
    13831506    """
    1384    
     1507
    13851508    # Input check
    13861509    if isinstance(points, Geospatial_data):
    1387         msg = "Use a Geospatial_data object or a mesh origin. Not both."
     1510        msg = "Use a Geospatial_data object or a mesh origin, not both."
    13881511        assert geo_reference is None, msg
    1389         return points   
     1512        return points
    13901513    else:
    13911514        # List or numeric array of absolute points
    13921515        points = ensure_numeric(points, Float)
    13931516
    1394     # Sort out geo reference   
     1517    # Sort out geo reference
    13951518    if geo_reference is None:
    13961519        geo = None
     
    14021525                                geo_reference[1],
    14031526                                geo_reference[2])
    1404                                
    1405     # Create Geospatial_data object with appropriate geo reference and return                           
    1406     points = Geospatial_data(data_points=points, geo_reference=geo)       
     1527
     1528    # Create Geospatial_data object with appropriate geo reference and return
     1529    points = Geospatial_data(data_points=points, geo_reference=geo)
     1530
    14071531    return points
    14081532
    14091533
    1410 
    1411 def find_optimal_smoothing_parameter(data_file,
     1534##
     1535# @brief
     1536# @param data_file
     1537# @param alpha_list
     1538# @param mesh_file
     1539# @param boundary_poly
     1540# @param mesh_resolution
     1541# @param north_boundary
     1542# @param south_boundary
     1543# @param east_boundary
     1544# @param west_boundary
     1545# @param plot_name
     1546# @param split_factor
     1547# @param seed_num
     1548# @param cache
     1549# @param verbose
     1550def find_optimal_smoothing_parameter(data_file,
    14121551                                     alpha_list=None,
    14131552                                     mesh_file=None,
     
    14221561                                     seed_num=None,
    14231562                                     cache=False,
    1424                                      verbose=False
    1425                                      ):
    1426    
    1427     """
    1428     Removes a small random sample of points from 'data_file'. Then creates
    1429     models with different alpha values from 'alpha_list' and cross validates
     1563                                     verbose=False):
     1564    """
     1565    Removes a small random sample of points from 'data_file'. Then creates
     1566    models with different alpha values from 'alpha_list' and cross validates
    14301567    the predicted value to the previously removed point data. Returns the
    14311568    alpha value which has the smallest covariance.
    1432    
     1569
    14331570    data_file: must not contain points outside the boundaries defined
    1434            and it either a pts, txt or csv file.
    1435    
     1571               and it either a pts, txt or csv file.
     1572
    14361573    alpha_list: the alpha values to test in a single list
    1437    
     1574
    14381575    mesh_file: name of the created mesh file or if passed in will read it.
    1439             NOTE, if there is a mesh file mesh_resolution,
    1440             north_boundary, south... etc will be ignored.
    1441    
     1576               NOTE, if there is a mesh file mesh_resolution,
     1577               north_boundary, south... etc will be ignored.
     1578
    14421579    mesh_resolution: the maximum area size for a triangle
    1443    
     1580
    14441581    north_boundary... west_boundary: the value of the boundary
    1445    
     1582
    14461583    plot_name: the name for the plot contain the results
    1447    
     1584
    14481585    seed_num: the seed to the random number generator
    1449    
     1586
    14501587    USAGE:
    1451         value, alpha = find_optimal_smoothing_parameter(data_file=fileName, 
     1588        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
    14521589                                             alpha_list=[0.0001, 0.01, 1],
    14531590                                             mesh_file=None,
     
    14601597                                             seed_num=100000,
    14611598                                             verbose=False)
    1462    
    1463     OUTPUT: returns the minumum normalised covalance calculate AND the 
     1599
     1600    OUTPUT: returns the minumum normalised covalance calculate AND the
    14641601           alpha that created it. PLUS writes a plot of the results
    1465            
     1602
    14661603    NOTE: code will not work if the data_file extent is greater than the
    14671604    boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
    1468        
    14691605    """
    14701606
     
    14721608    from anuga.geospatial_data.geospatial_data import Geospatial_data
    14731609    from anuga.pmesh.mesh_interface import create_mesh_from_regions
    1474    
    14751610    from anuga.utilities.numerical_tools import cov
    14761611    from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
    1477     from anuga.utilities.polygon import is_inside_polygon 
    1478     from anuga.fit_interpolate.benchmark_least_squares import mem_usage 
    1479    
     1612    from anuga.utilities.polygon import is_inside_polygon
     1613    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
     1614
    14801615
    14811616    attribute_smoothed='elevation'
     
    14831618    if mesh_file is None:
    14841619        if verbose: print "building mesh"
    1485         mesh_file='temp.msh'
    1486 
    1487         if north_boundary is None or south_boundary is None or \
    1488            east_boundary is None or west_boundary is None:
    1489             no_boundary=True
     1620        mesh_file = 'temp.msh'
     1621
     1622        if north_boundary is None or south_boundary is None \
     1623           or east_boundary is None or west_boundary is None:
     1624            no_boundary = True
    14901625        else:
    1491             no_boundary=False
    1492        
     1626            no_boundary = False
     1627
    14931628        if no_boundary is True:
    1494             msg= 'All boundaries must be defined'
     1629            msg = 'All boundaries must be defined'
    14951630            raise msg
    14961631
     
    14991634                     [west_boundary,north_boundary],
    15001635                     [west_boundary,south_boundary]]
    1501                      
     1636
    15021637        create_mesh_from_regions(poly_topo,
    15031638                                 boundary_tags={'back': [2],
    15041639                                                'side': [1,3],
    15051640                                                'ocean': [0]},
    1506                              maximum_triangle_area=mesh_resolution,
    1507                              filename=mesh_file,
    1508                              use_cache=cache,
    1509                              verbose=verbose)
     1641                                 maximum_triangle_area=mesh_resolution,
     1642                                 filename=mesh_file,
     1643                                 use_cache=cache,
     1644                                 verbose=verbose)
    15101645
    15111646    else: # if mesh file provided
    15121647        #test mesh file exists?
    1513         if verbose: "reading from file: %s" %mesh_file
     1648        if verbose: "reading from file: %s" % mesh_file
    15141649        if access(mesh_file,F_OK) == 0:
    1515             msg="file %s doesn't exist!" %mesh_file
     1650            msg = "file %s doesn't exist!" % mesh_file
    15161651            raise IOError, msg
    15171652
    15181653    #split topo data
    1519     if verbose: print 'Reading elevation file: %s' %data_file
     1654    if verbose: print 'Reading elevation file: %s' % data_file
    15201655    G = Geospatial_data(file_name = data_file)
    15211656    if verbose: print 'Start split'
    1522     G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
     1657    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
    15231658    if verbose: print 'Finish split'
    1524     points=G_small.get_data_points()
     1659    points = G_small.get_data_points()
    15251660
    15261661    if verbose: print "Number of points in sample to compare: ", len(points)
    1527    
    1528     if alpha_list==None:
     1662
     1663    if alpha_list == None:
    15291664        alphas = [0.001,0.01,100]
    15301665        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
    15311666         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
    1532        
    15331667    else:
    1534         alphas=alpha_list
     1668        alphas = alpha_list
    15351669
    15361670    #creates array with columns 1 and 2 are x, y. column 3 is elevation
    1537     #4 onwards is the elevation_predicted using the alpha, which will 
     1671    #4 onwards is the elevation_predicted using the alpha, which will
    15381672    #be compared later against the real removed data
    1539     data=array([],typecode=Float)
    1540 
    1541     data=resize(data,(len(points),3+len(alphas)))
     1673    data = array([], typecode=Float)
     1674
     1675    data=resize(data, (len(points), 3+len(alphas)))
    15421676
    15431677    #gets relative point from sample
    1544     data[:,0]=points[:,0]
    1545     data[:,1]=points[:,1]
    1546     elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
    1547     data[:,2]=elevation_sample
    1548 
    1549     normal_cov=array(zeros([len(alphas),2]),typecode=Float)
     1678    data[:,0] = points[:,0]
     1679    data[:,1] = points[:,1]
     1680    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
     1681    data[:,2] = elevation_sample
     1682
     1683    normal_cov=array(zeros([len(alphas), 2]), typecode=Float)
    15501684
    15511685    if verbose: print 'Setup computational domains with different alphas'
    15521686
    1553     #print 'memory usage before domains',mem_usage()
    1554 
    1555     for i,alpha in enumerate(alphas):
     1687    for i, alpha in enumerate(alphas):
    15561688        #add G_other data to domains with different alphas
    1557         if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
     1689        if verbose:
     1690            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
    15581691        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
    1559         if verbose:print domain.statistics()
    1560         domain.set_quantity(attribute_smoothed,
    1561                     geospatial_data = G_other,
    1562                     use_cache = cache,
    1563                     verbose = verbose,
    1564                     alpha = alpha)
    1565 
    1566        
     1692        if verbose: print domain.statistics()
     1693        domain.set_quantity(attribute_smoothed,
     1694                            geospatial_data=G_other,
     1695                            use_cache=cache,
     1696                            verbose=verbose,
     1697                            alpha=alpha)
     1698
    15671699        # Convert points to geospatial data for use with get_values below
    15681700        points_geo = Geospatial_data(points, domain.geo_reference)
    1569        
    1570         #returns the predicted elevation of the points that were "split" out 
     1701
     1702        #returns the predicted elevation of the points that were "split" out
    15711703        #of the original data set for one particular alpha
    15721704        if verbose: print 'Get predicted elevation for location to be compared'
    1573         elevation_predicted=domain.quantities[attribute_smoothed].\
    1574                             get_values(interpolation_points=points_geo)
    1575  
     1705        elevation_predicted = \
     1706                domain.quantities[attribute_smoothed].\
     1707                    get_values(interpolation_points=points_geo)
     1708
    15761709        #add predicted elevation to array that starts with x, y, z...
    1577         data[:,i+3]=elevation_predicted
    1578 
    1579         sample_cov= cov(elevation_sample)
    1580         #print elevation_predicted
    1581         ele_cov= cov(elevation_sample-elevation_predicted)
    1582         normal_cov[i,:]= [alpha,ele_cov/sample_cov]
    1583         #print 'memory usage during compare',mem_usage()
    1584        
    1585         if verbose: print'Covariance for alpha ',normal_cov[i][0],'= ',normal_cov[i][1]
    1586         if verbose: print'-------------------------------------------- \n'
    1587 
    1588     normal_cov0=normal_cov[:,0]
    1589     normal_cov_new=take(normal_cov,argsort(normal_cov0))
     1710        data[:,i+3] = elevation_predicted
     1711
     1712        sample_cov = cov(elevation_sample)
     1713        ele_cov = cov(elevation_sample - elevation_predicted)
     1714        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
     1715
     1716        if verbose:
     1717            print 'Covariance for alpha ', normal_cov[i][0], '= ', \
     1718                      normal_cov[i][1]
     1719            print '-------------------------------------------- \n'
     1720
     1721    normal_cov0 = normal_cov[:,0]
     1722    normal_cov_new = take(normal_cov, argsort(normal_cov0))
    15901723
    15911724    if plot_name is not None:
    1592         from pylab import savefig,semilogx,loglog
    1593         semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
    1594         loglog(normal_cov_new[:,0],normal_cov_new[:,1])
    1595         savefig(plot_name,dpi=300)
     1725        from pylab import savefig, semilogx, loglog
     1726
     1727        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
     1728        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
     1729        savefig(plot_name, dpi=300)
     1730
    15961731    if mesh_file == 'temp.msh':
    15971732        remove(mesh_file)
    1598    
    1599     if verbose: 
     1733
     1734    if verbose:
    16001735        print 'Final results:'
    1601         for i,alpha in enumerate(alphas):
    1602             print'covariance for alpha %s = %s ' %(normal_cov[i][0],normal_cov[i][1])
    1603         print '\n Optimal alpha is: %s ' % normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
     1736        for i, alpha in enumerate(alphas):
     1737            print 'covariance for alpha %s = %s ' \
     1738                  % (normal_cov[i][0], normal_cov[i][1])
     1739        print '\n Optimal alpha is: %s ' \
     1740              % normal_cov_new[(argmin(normal_cov_new, axis=0))[1], 0]
    16041741
    16051742    # covariance and optimal alpha
    1606     return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
    1607 
    1608 def old_find_optimal_smoothing_parameter(data_file,
    1609                                      alpha_list=None,
    1610                                      mesh_file=None,
    1611                                      boundary_poly=None,
    1612                                      mesh_resolution=100000,
    1613                                      north_boundary=None,
    1614                                      south_boundary=None,
    1615                                      east_boundary=None,
    1616                                      west_boundary=None,
    1617                                      plot_name='all_alphas',
    1618                                      split_factor=0.1,
    1619                                      seed_num=None,
    1620                                      cache=False,
    1621                                      verbose=False
    1622                                      ):
    1623    
     1743    return (min(normal_cov_new[:,1]),
     1744            normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0])
     1745
     1746
     1747##
     1748# @brief
     1749# @param data_file
     1750# @param alpha_list
     1751# @param mesh_file
     1752# @param boundary_poly
     1753# @param mesh_resolution
     1754# @param north_boundary
     1755# @param south_boundary
     1756# @param east_boundary
     1757# @param west_boundary
     1758# @param plot_name
     1759# @param split_factor
     1760# @param seed_num
     1761# @param cache
     1762# @param verbose
     1763def old_find_optimal_smoothing_parameter(data_file,
     1764                                         alpha_list=None,
     1765                                         mesh_file=None,
     1766                                         boundary_poly=None,
     1767                                         mesh_resolution=100000,
     1768                                         north_boundary=None,
     1769                                         south_boundary=None,
     1770                                         east_boundary=None,
     1771                                         west_boundary=None,
     1772                                         plot_name='all_alphas',
     1773                                         split_factor=0.1,
     1774                                         seed_num=None,
     1775                                         cache=False,
     1776                                         verbose=False):
    16241777    """
    16251778    data_file: must not contain points outside the boundaries defined
    1626            and it either a pts, txt or csv file.
    1627    
     1779               and it either a pts, txt or csv file.
     1780
    16281781    alpha_list: the alpha values to test in a single list
    1629    
     1782
    16301783    mesh_file: name of the created mesh file or if passed in will read it.
    1631             NOTE, if there is a mesh file mesh_resolution,
    1632             north_boundary, south... etc will be ignored.
    1633    
     1784               NOTE, if there is a mesh file mesh_resolution,
     1785               north_boundary, south... etc will be ignored.
     1786
    16341787    mesh_resolution: the maximum area size for a triangle
    1635    
     1788
    16361789    north_boundary... west_boundary: the value of the boundary
    1637    
     1790
    16381791    plot_name: the name for the plot contain the results
    1639    
     1792
    16401793    seed_num: the seed to the random number generator
    1641    
     1794
    16421795    USAGE:
    1643         value, alpha = find_optimal_smoothing_parameter(data_file=fileName, 
     1796        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
    16441797                                             alpha_list=[0.0001, 0.01, 1],
    16451798                                             mesh_file=None,
     
    16521805                                             seed_num=100000,
    16531806                                             verbose=False)
    1654    
    1655     OUTPUT: returns the minumum normalised covalance calculate AND the 
    1656            alpha that created it. PLUS writes a plot of the results
    1657            
     1807
     1808    OUTPUT: returns the minumum normalised covalance calculate AND the
     1809            alpha that created it. PLUS writes a plot of the results
     1810
    16581811    NOTE: code will not work if the data_file extend is greater than the
    1659     boundary_polygon or the north_boundary...west_boundary
    1660        
     1812          boundary_polygon or the north_boundary...west_boundary
    16611813    """
    16621814
     
    16641816    from anuga.geospatial_data.geospatial_data import Geospatial_data
    16651817    from anuga.pmesh.mesh_interface import create_mesh_from_regions
    1666    
    16671818    from anuga.utilities.numerical_tools import cov
    16681819    from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
    1669     from anuga.utilities.polygon import is_inside_polygon 
    1670     from anuga.fit_interpolate.benchmark_least_squares import mem_usage 
    1671    
    1672 
    1673     attribute_smoothed='elevation'
     1820    from anuga.utilities.polygon import is_inside_polygon
     1821    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
     1822
     1823
     1824    attribute_smoothed = 'elevation'
    16741825
    16751826    if mesh_file is None:
    1676         mesh_file='temp.msh'
    1677 
    1678         if north_boundary is None or south_boundary is None or \
    1679            east_boundary is None or west_boundary is None:
    1680             no_boundary=True
     1827        mesh_file = 'temp.msh'
     1828
     1829        if north_boundary is None or south_boundary is None \
     1830           or east_boundary is None or west_boundary is None:
     1831            no_boundary = True
    16811832        else:
    1682             no_boundary=False
    1683        
     1833            no_boundary = False
     1834
    16841835        if no_boundary is True:
    1685             msg= 'All boundaries must be defined'
     1836            msg = 'All boundaries must be defined'
    16861837            raise msg
    16871838
     
    16901841                     [west_boundary,north_boundary],
    16911842                     [west_boundary,south_boundary]]
    1692                      
     1843
    16931844        create_mesh_from_regions(poly_topo,
    16941845                                 boundary_tags={'back': [2],
    16951846                                                'side': [1,3],
    16961847                                                'ocean': [0]},
    1697                              maximum_triangle_area=mesh_resolution,
    1698                              filename=mesh_file,
    1699                              use_cache=cache,
    1700                              verbose=verbose)
     1848                                 maximum_triangle_area=mesh_resolution,
     1849                                 filename=mesh_file,
     1850                                 use_cache=cache,
     1851                                 verbose=verbose)
    17011852
    17021853    else: # if mesh file provided
    17031854        #test mesh file exists?
    17041855        if access(mesh_file,F_OK) == 0:
    1705             msg="file %s doesn't exist!" %mesh_file
     1856            msg = "file %s doesn't exist!" % mesh_file
    17061857            raise IOError, msg
    17071858
    17081859    #split topo data
    1709     G = Geospatial_data(file_name = data_file)
     1860    G = Geospatial_data(file_name=data_file)
    17101861    if verbose: print 'start split'
    1711     G_small, G_other = G.split(split_factor,seed_num, verbose=verbose)
     1862    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
    17121863    if verbose: print 'finish split'
    1713     points=G_small.get_data_points()
     1864    points = G_small.get_data_points()
    17141865
    17151866    #FIXME: Remove points outside boundary polygon
    17161867#    print 'new point',len(points)
    1717 #   
     1868#
    17181869#    new_points=[]
    17191870#    new_points=array([],typecode=Float)
     
    17261877#    points = new_points
    17271878
    1728    
    17291879    if verbose: print "Number of points in sample to compare: ", len(points)
    1730    
    1731     if alpha_list==None:
     1880
     1881    if alpha_list == None:
    17321882        alphas = [0.001,0.01,100]
    17331883        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
    17341884         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
    1735        
    17361885    else:
    1737         alphas=alpha_list
     1886        alphas = alpha_list
     1887
    17381888    domains = {}
    17391889
    17401890    if verbose: print 'Setup computational domains with different alphas'
    1741 
    1742     print 'memory usage before domains',mem_usage()
    17431891
    17441892    for alpha in alphas:
    17451893        #add G_other data to domains with different alphas
    1746         if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
     1894        if verbose:
     1895            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
    17471896        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
    1748         if verbose:print domain.statistics()
    1749         domain.set_quantity(attribute_smoothed,
    1750                     geospatial_data = G_other,
    1751                     use_cache = cache,
    1752                     verbose = verbose,
    1753                     alpha = alpha)
    1754         domains[alpha]=domain
    1755 
    1756     print 'memory usage after domains',mem_usage()
     1897        if verbose: print domain.statistics()
     1898        domain.set_quantity(attribute_smoothed,
     1899                            geospatial_data=G_other,
     1900                            use_cache=cache,
     1901                            verbose=verbose,
     1902                            alpha=alpha)
     1903        domains[alpha] = domain
    17571904
    17581905    #creates array with columns 1 and 2 are x, y. column 3 is elevation
    1759     #4 onwards is the elevation_predicted using the alpha, which will 
     1906    #4 onwards is the elevation_predicted using the alpha, which will
    17601907    #be compared later against the real removed data
    1761     data=array([],typecode=Float)
    1762 
    1763     data=resize(data,(len(points),3+len(alphas)))
     1908    data = array([], typecode=Float)
     1909
     1910    data = resize(data, (len(points), 3+len(alphas)))
    17641911
    17651912    #gets relative point from sample
    1766     data[:,0]=points[:,0]
    1767     data[:,1]=points[:,1]
    1768     elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
    1769     data[:,2]=elevation_sample
    1770 
    1771     normal_cov=array(zeros([len(alphas),2]),typecode=Float)
    1772 
    1773     if verbose: print 'Determine difference between predicted results and actual data'
    1774     for i,alpha in enumerate(domains):
    1775         if verbose: print'Alpha =',alpha
    1776        
    1777         points_geo=domains[alpha].geo_reference.change_points_geo_ref(points)
    1778         #returns the predicted elevation of the points that were "split" out
     1913    data[:,0] = points[:,0]
     1914    data[:,1] = points[:,1]
     1915    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
     1916    data[:,2] = elevation_sample
     1917
     1918    normal_cov = array(zeros([len(alphas), 2]), typecode=Float)
     1919
     1920    if verbose:
     1921        print 'Determine difference between predicted results and actual data'
     1922    for i, alpha in enumerate(domains):
     1923        if verbose: print'Alpha =', alpha
     1924
     1925        points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
     1926        #returns the predicted elevation of the points that were "split" out
    17791927        #of the original data set for one particular alpha
    1780         elevation_predicted=domains[alpha].quantities[attribute_smoothed].\
    1781                             get_values(interpolation_points=points_geo)
    1782  
     1928        elevation_predicted = \
     1929                domains[alpha].quantities[attribute_smoothed].\
     1930                        get_values(interpolation_points=points_geo)
     1931
    17831932        #add predicted elevation to array that starts with x, y, z...
    1784         data[:,i+3]=elevation_predicted
    1785 
    1786         sample_cov= cov(elevation_sample)
    1787         #print elevation_predicted
    1788         ele_cov= cov(elevation_sample-elevation_predicted)
    1789         normal_cov[i,:]= [alpha,ele_cov/sample_cov]
    1790         print 'memory usage during compare',mem_usage()
    1791        
    1792 
    1793         if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
    1794 
    1795     normal_cov0=normal_cov[:,0]
    1796     normal_cov_new=take(normal_cov,argsort(normal_cov0))
     1933        data[:,i+3] = elevation_predicted
     1934
     1935        sample_cov = cov(elevation_sample)
     1936        ele_cov = cov(elevation_sample - elevation_predicted)
     1937        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
     1938        print 'memory usage during compare', mem_usage()
     1939        if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
     1940
     1941    normal_cov0 = normal_cov[:,0]
     1942    normal_cov_new = take(normal_cov, argsort(normal_cov0))
    17971943
    17981944    if plot_name is not None:
    17991945        from pylab import savefig,semilogx,loglog
    1800         semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
    1801         loglog(normal_cov_new[:,0],normal_cov_new[:,1])
    1802         savefig(plot_name,dpi=300)
     1946
     1947        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
     1948        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
     1949        savefig(plot_name, dpi=300)
    18031950    if mesh_file == 'temp.msh':
    18041951        remove(mesh_file)
    1805    
    1806     return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
    1807 
    1808          
     1952
     1953    return (min(normal_cov_new[:,1]),
     1954            normal_cov_new[(argmin(normal_cov_new, axis=0))[1],0])
     1955
     1956
    18091957if __name__ == "__main__":
    18101958    pass
    1811    
     1959
Note: See TracChangeset for help on using the changeset viewer.