source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 7317

Last change on this file since 7317 was 7317, checked in by rwilson, 15 years ago

Replaced 'print' statements with log.critical() calls.

File size: 70.2 KB
Line 
1"""Class Geospatial_data
2
3Manipulation of locations on the planet and associated attributes.
4"""
5
6from sys import maxint
7from os import access, F_OK, R_OK,remove
8from types import DictType
9from warnings import warn
10from string import lower
11from copy import deepcopy
12import copy
13
14from Scientific.IO.NetCDF import NetCDFFile
15import numpy as num
16from numpy.random import randint, seed
17
18from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
19from anuga.utilities.numerical_tools import ensure_numeric
20from anuga.coordinate_transforms.geo_reference import Geo_reference, \
21     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
22from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
23from anuga.utilities.system_tools import clean_line
24from anuga.utilities.anuga_exceptions import ANUGAError
25from anuga.config import points_file_block_line_size as MAX_READ_LINES
26from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
27from anuga.config import netcdf_float
28import anuga.utilities.log as log
29
30
31DEFAULT_ATTRIBUTE = 'elevation'
32
33
34##
35# @brief ??
36class Geospatial_data:
37
38    ##
39    # @brief
40    # @param data_points Mx2 iterable of tuples or array of x,y coordinates.
41    # @param attributes Associated values for each data point.
42    # @param geo_reference ??
43    # @param default_attribute_name ??
44    # @param file_name
45    # @param latitudes ??
46    # @param longitudes ??
47    # @param points_are_lats_longs True if points are lat/long, not UTM.
48    # @param max_read_lines Size of block to read, if blocking.
49    # @param load_file_now True if blocking but we eant to read file now.
50    # @param verbose True if this class instance is verbose.
51    def __init__(self,
52                 data_points=None, # this can also be a points file name
53                 attributes=None,
54                 geo_reference=None,
55                 default_attribute_name=None,
56                 file_name=None,
57                 latitudes=None,
58                 longitudes=None,
59                 points_are_lats_longs=False,
60                 max_read_lines=None,
61                 load_file_now=True,
62                 verbose=False):
63        """Create instance from data points and associated attributes
64
65        data_points: x,y coordinates in meters. Type must be either a
66        sequence of 2-tuples or an Mx2 numeric array of floats.  A file name
67        with extension .txt, .cvs or .pts can also be passed in here.
68
69        attributes: Associated values for each data point. The type
70        must be either a list or an array of length M or a dictionary
71        of lists (or arrays) of length M. In the latter case the keys
72        in the dictionary represent the attribute names, in the former
73        the attribute will get the default name "elevation".
74
75        geo_reference: Object representing the origin of the data
76        points. It contains UTM zone, easting and northing and data
77        points are assumed to be relative to this origin.
78        If geo_reference is None, the default geo ref object is used.
79
80        default_attribute_name: Name of default attribute to be used with
81        get_attribute_values. The idea is that the dataset can be
82        equipped with information about which attribute to return.
83        If None, the default is the "first"
84
85        latitudes, longitudes: Vectors of latitudes and longitudes,
86        used to specify location instead of points.
87
88        points_are_lats_longs: Set this as true if the points are actually
89        lats and longs, not UTM
90
91        max_read_lines: The number of rows read into memory when using
92        blocking to read a file.
93
94        load_file_now:  If true the file is automatically loaded
95        into the geospatial instance. Used when blocking.
96
97        file_name: Name of input netCDF file or .txt file. netCDF file must
98        have dimensions "points" etc.
99        .txt file is a comma seperated file with x, y and attribute
100        data.
101
102        The first line has the titles of the columns.  The first two
103        column titles are checked to see if they start with lat or
104        long (not case sensitive).  If so the data is assumed to be
105        latitude and longitude, in decimal format and converted to
106        UTM.  Otherwise the first two columns are assumed to be the x
107        and y, and the title names acually used are ignored.
108
109
110        The format for a .txt file is:
111            1st line:     [column names]
112            other lines:  x y [attributes]
113
114            for example:
115            x, y, elevation, friction
116            0.6, 0.7, 4.9, 0.3
117            1.9, 2.8, 5, 0.3
118            2.7, 2.4, 5.2, 0.3
119
120        The first two columns have to be x, y or lat, long
121        coordinates.
122
123
124        The format for a Points dictionary is:
125          ['pointlist'] a 2 column array describing points. 1st column x,
126          2nd column y.
127          ['attributelist'], a dictionary of 1D arrays, representing
128          attribute values at the point.  The dictionary key is the attribute
129          header.
130          ['geo_reference'] a Geo_refernece object. Use if the point
131          information is relative. This is optional.
132            eg
133            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
134            dic['attributelist']['elevation'] = [[7.0,5.0]
135
136        verbose:
137        """
138
139        if isinstance(data_points, basestring):
140            # assume data_points is really a file name
141            file_name = data_points
142
143        self.set_verbose(verbose)
144        self.geo_reference = None
145        self.file_name = file_name
146
147        if max_read_lines is None:
148            self.max_read_lines = MAX_READ_LINES
149        else:
150            self.max_read_lines = max_read_lines
151
152        if file_name is None:
153            if (latitudes is not None or longitudes is not None
154                    or points_are_lats_longs):
155                data_points, geo_reference = \
156                    _set_using_lat_long(latitudes=latitudes,
157                                        longitudes=longitudes,
158                                        geo_reference=geo_reference,
159                                        data_points=data_points,
160                                        points_are_lats_longs=
161                                            points_are_lats_longs)
162            self.check_data_points(data_points)
163            self.set_attributes(attributes)
164            self.set_geo_reference(geo_reference)
165            self.set_default_attribute_name(default_attribute_name)
166        elif load_file_now is True:
167            # watch for case where file name and points,
168            # attributes etc are provided!!
169            # if file name then all provided info will be removed!
170
171            if verbose is True:
172                if file_name is not None:
173                    log.critical('Loading Geospatial data from file: %s'
174                                 % file_name)
175
176            self.import_points_file(file_name, verbose=verbose)
177
178            self.check_data_points(self.data_points)
179            self.set_attributes(self.attributes)
180            self.set_geo_reference(self.geo_reference)
181            self.set_default_attribute_name(default_attribute_name)
182
183        if verbose is True:
184            if file_name is not None:
185                log.critical('Geospatial data created from file: %s'
186                             % file_name)
187                if load_file_now is False:
188                    log.critical('Data will be loaded blockwise on demand')
189
190                    if file_name.endswith('csv') or file_name.endswith('txt'):
191                        pass
192                        # This message was misleading.
193                        # FIXME (Ole): Are we blocking here or not?
194                        # print 'ASCII formats are not that great for '
195                        # print 'blockwise reading. Consider storing this'
196                        # print 'data as a pts NetCDF format'
197
198    ##
199    # @brief Return length of the points set.
200    def __len__(self):
201        return len(self.data_points)
202
203    ##
204    # @brief Return a string representation of the points set.
205    def __repr__(self):
206        return str(self.get_data_points(absolute=True))
207
208    ##
209    # @brief Check data points.
210    # @param data_points Points data to check and store in instance.
211    # @note Throws ValueError exception if no data.
212    def check_data_points(self, data_points):
213        """Checks data points"""
214
215        if data_points is None:
216            self.data_points = None
217            msg = 'There is no data or file provided!'
218            raise ValueError, msg
219        else:
220            self.data_points = ensure_numeric(data_points)
221            if not (0,) == self.data_points.shape:
222                assert len(self.data_points.shape) == 2
223                assert self.data_points.shape[1] == 2
224
225    ##
226    # @brief Check and assign attributes data.
227    # @param attributes Dictionary or scalar to save as .attributes.
228    # @note Throws exception if unable to convert dict keys to numeric.
229    def set_attributes(self, attributes):
230        """Check and assign attributes dictionary"""
231
232        if attributes is None:
233            self.attributes = None
234            return
235
236        if not isinstance(attributes, DictType):
237            # Convert single attribute into dictionary
238            attributes = {DEFAULT_ATTRIBUTE: attributes}
239
240        # Check input attributes
241        for key in attributes.keys():
242            try:
243                attributes[key] = ensure_numeric(attributes[key])
244            except:
245                msg = ("Attribute '%s' (%s) could not be converted to a"
246                       "numeric vector" % (str(key), str(attributes[key])))
247                raise Exception, msg
248
249        self.attributes = attributes
250
251    ##
252    # @brief Set the georeference of geospatial data.
253    # @param geo_reference The georeference data to set.
254    # @note Will raise exception if param not instance of Geo_reference.
255    def set_geo_reference(self, geo_reference):
256        """Set the georeference of geospatial data.
257
258        It can also be used to change the georeference and will ensure that
259        the absolute coordinate values are unchanged.
260        """
261
262        if geo_reference is None:
263            # Use default - points are in absolute coordinates
264            geo_reference = Geo_reference()
265
266        # Allow for tuple (zone, xllcorner, yllcorner)
267        geo_reference = ensure_geo_reference(geo_reference)
268
269        if not isinstance(geo_reference, Geo_reference):
270            # FIXME (Ole): This exception will be raised even
271            # if geo_reference is None. Is that the intent Duncan?
272            msg = ('Argument geo_reference must be a valid Geo_reference '
273                   'object or None.')
274            raise Expection, msg
275
276        # If a geo_reference already exists, change the point data according to
277        # the new geo reference
278        if  self.geo_reference is not None:
279            self.data_points = self.get_data_points(geo_reference=geo_reference)
280
281        self.geo_reference = geo_reference
282
283    ##
284    # @brief Set default attribute name.
285    # @param default_attribute_name The default to save.
286    def set_default_attribute_name(self, default_attribute_name):
287        self.default_attribute_name = default_attribute_name
288
289    ##
290    # @brief Set the instance verbose flag.
291    # @param verbose The value to save.
292    # @note Will raise exception if param is not True or False.
293    def set_verbose(self, verbose=False):
294        if verbose in [False, True]:
295            self.verbose = verbose
296        else:
297            msg = 'Illegal value: %s' % str(verbose)
298            raise Exception, msg
299
300    ##
301    # @brief Clip geospatial data by a given polygon.
302    # @param polygon The polygon to clip with.
303    # @param closed True if points on clip boundary are not included in result.
304    # @param verbose True if this function is verbose.
305    def clip(self, polygon, closed=True, verbose=False):
306        """Clip geospatial data by a polygon
307
308        Input
309          polygon - Either a list of points, an Nx2 array or
310                    a Geospatial data object.
311          closed - (optional) determine whether points on boundary should be
312          regarded as belonging to the polygon (closed = True)
313          or not (closed = False). Default is True.
314
315        Output
316          New geospatial data object representing points inside
317          specified polygon.
318
319
320        Note - this method is non-destructive and leaves the data in 'self'
321        unchanged
322        """
323
324        from anuga.utilities.polygon import inside_polygon
325
326        if isinstance(polygon, Geospatial_data):
327            # Polygon is an object - extract points
328            polygon = polygon.get_data_points()
329
330        points = self.get_data_points()
331        inside_indices = inside_polygon(points, polygon, closed, verbose)
332
333        clipped_G = self.get_sample(inside_indices)
334
335        return clipped_G
336
337    ##
338    # @brief Clip points data by polygon, return points outside polygon.
339    # @param polygon The polygon to clip with.
340    # @param closed True if points on clip boundary are not included in result.
341    # @param verbose True if this function is verbose.
342    def clip_outside(self, polygon, closed=True, verbose=False):
343        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
344
345        Input
346          polygon - Either a list of points, an Nx2 array or
347                    a Geospatial data object.
348          closed - (optional) determine whether points on boundary should be
349          regarded as belonging to the polygon (closed = True)
350          or not (closed = False). Default is True.
351
352        Output
353          Geospatial data object representing point OUTSIDE specified polygon
354        """
355
356        from anuga.utilities.polygon import outside_polygon
357
358        if isinstance(polygon, Geospatial_data):
359            # Polygon is an object - extract points
360            polygon = polygon.get_data_points()
361
362        points = self.get_data_points()
363        outside_indices = outside_polygon(points, polygon, closed,verbose)
364
365        clipped_G = self.get_sample(outside_indices)
366
367        return clipped_G
368
369    ##
370    # @brief Get instance geo_reference data.
371    def get_geo_reference(self):
372        return self.geo_reference
373
374    ##
375    # @brief Get coordinates for all data points as an Nx2 array.
376    # @param absolute If True, return UTM, else relative to xll/yll corners.
377    # @param geo_reference If supplied, points are relative to it.
378    # @param as_lat_long If True, return points as lat/lon.
379    # @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
380    # @return A set of data points, in appropriate form.
381    def get_data_points(self,
382                        absolute=True,
383                        geo_reference=None,
384                        as_lat_long=False,
385                        isSouthHemisphere=True):
386        """Get coordinates for all data points as an Nx2 array
387
388        If absolute is False returned coordinates are relative to the
389        internal georeference's xll and yll corners, otherwise
390        absolute UTM coordinates are returned.
391
392        If a geo_reference is passed the points are returned relative
393        to that geo_reference.
394
395        isSH (isSouthHemisphere) is only used when getting data
396        points "as_lat_long" is True and if FALSE will return lats and
397        longs valid for the Northern Hemisphere.
398
399        Default: absolute is True.
400        """
401
402        if as_lat_long is True:
403            msg = "Points need a zone to be converted into lats and longs"
404            assert self.geo_reference is not None, msg
405            zone = self.geo_reference.get_zone()
406            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
407            lats_longs = []
408            for point in self.get_data_points(True):
409                # UTMtoLL(northing, easting, zone,
410                lat_calced, long_calced = UTMtoLL(point[1], point[0],
411                                                  zone, isSouthHemisphere)
412                lats_longs.append((lat_calced, long_calced)) # to hash
413            return lats_longs
414
415        if absolute is True and geo_reference is None:
416            return self.geo_reference.get_absolute(self.data_points)
417        elif geo_reference is not None:
418            return geo_reference.change_points_geo_ref(self.data_points,
419                                                       self.geo_reference)
420        else:
421            # If absolute is False
422            return self.data_points
423
424    ##
425    # @brief Get value for attribute name.
426    # @param attribute_name Name to get value for.
427    # @note If name passed is None, return default attribute value.
428    def get_attributes(self, attribute_name=None):
429        """Return values for one named attribute.
430
431        If attribute_name is None, default_attribute_name is used
432        """
433
434        if attribute_name is None:
435            if self.default_attribute_name is not None:
436                attribute_name = self.default_attribute_name
437            else:
438                attribute_name = self.attributes.keys()[0]
439                # above line takes the first one from keys
440
441        if self.verbose is True:
442            log.critical('Using attribute %s' % attribute_name)
443            log.critical('Available attributes: %s' % (self.attributes.keys()))
444
445        msg = 'Attribute name %s does not exist in data set' % attribute_name
446        assert self.attributes.has_key(attribute_name), msg
447
448        return self.attributes[attribute_name]
449
450    ##
451    # @brief Get all instance attributes.
452    # @return The instance attribute dictionary, or None if no attributes.
453    def get_all_attributes(self):
454        """Return values for all attributes.
455        The return value is either None or a dictionary (possibly empty).
456        """
457
458        return self.attributes
459
460    ##
461    # @brief Override __add__() to allow addition of geospatial objects.
462    # @param self This object.
463    # @param other The second object.
464    # @return The new geospatial object.
465    def __add__(self, other):
466        """Returns the addition of 2 geospatial objects,
467        objects are concatencated to the end of each other
468
469        NOTE: doesn't add if objects contain different
470        attributes
471
472        Always return absolute points!
473        This also means, that if you add None to the object,
474        it will be turned into absolute coordinates
475
476        other can be None in which case nothing is added to self.
477        """
478
479        # find objects zone and checks if the same
480        geo_ref1 = self.get_geo_reference()
481        zone1 = geo_ref1.get_zone()
482
483        if other is not None:
484            geo_ref2 = other.get_geo_reference()
485            zone2 = geo_ref2.get_zone()
486            geo_ref1.reconcile_zones(geo_ref2)
487            new_points = num.concatenate((self.get_data_points(absolute=True),
488                                          other.get_data_points(absolute=True)),
489                                         axis = 0)
490
491            # Concatenate attributes if any
492            if self.attributes is None:
493                if other.attributes is not None:
494                    msg = ('Geospatial data must have the same '
495                           'attributes to allow addition.')
496                    raise Exception, msg
497
498                new_attributes = None
499            else:
500                new_attributes = {}
501                for x in self.attributes.keys():
502                    if other.attributes.has_key(x):
503                        attrib1 = self.attributes[x]
504                        attrib2 = other.attributes[x]
505                        new_attributes[x] = num.concatenate((attrib1, attrib2),
506                                                            axis=0) #??default#
507                    else:
508                        msg = ('Geospatial data must have the same '
509                               'attributes to allow addition.')
510                        raise Exception, msg
511        else:
512            # other is None:
513            new_points = self.get_data_points(absolute=True)
514            new_attributes = self.attributes
515
516        # Instantiate new data object and return absolute coordinates
517        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
518        return Geospatial_data(new_points, new_attributes, new_geo_ref)
519
520    ##
521    # @brief Override the addition case where LHS isn't geospatial object.
522    # @param self This object.
523    # @param other The second object.
524    # @return The new geospatial object.
525    def __radd__(self, other):
526        """Handle cases like None + Geospatial_data(...)"""
527
528        return self + other
529
530################################################################################
531#  IMPORT/EXPORT POINTS FILES
532################################################################################
533
534    ##
535    # @brief Import a .txt, .csv or .pts points data file.
536    # @param file_name
537    # @param delimiter
538    # @param verbose True if this function is to be verbose.
539    # @note Will throw IOError or SyntaxError if there is a problem.
540    def import_points_file(self, file_name, delimiter=None, verbose=False):
541        """ load an .txt, .csv or .pts file
542
543        Note: will throw an IOError/SyntaxError if it can't load the file.
544        Catch these!
545
546        Post condition: self.attributes dictionary has been set
547        """
548
549        if access(file_name, F_OK) == 0 :
550            msg = 'File %s does not exist or is not accessible' % file_name
551            raise IOError, msg
552
553        attributes = {}
554        if file_name[-4:] == ".pts":
555            try:
556                data_points, attributes, geo_reference = \
557                             _read_pts_file(file_name, verbose)
558            except IOError, e:
559                msg = 'Could not open file %s ' % file_name
560                raise IOError, msg
561        elif file_name[-4:] == ".txt" or file_name[-4:]== ".csv":
562            try:
563                data_points, attributes, geo_reference = \
564                             _read_csv_file(file_name, verbose)
565            except IOError, e:
566                # This should only be if a file is not found
567                msg = ('Could not open file %s. Check the file location.'
568                       % file_name)
569                raise IOError, msg
570            except SyntaxError, e:
571                # This should only be if there is a format error
572                msg = ('Problem with format of file %s.\n%s'
573                       % (file_name, Error_message['IOError']))
574                raise SyntaxError, msg
575        else:
576            msg = 'Extension %s is unknown' % file_name[-4:]
577            raise IOError, msg
578
579        self.data_points = data_points
580        self.attributes = attributes
581        self.geo_reference = geo_reference
582
583    ##
584    # @brief Write points data to a file (.csv or .pts).
585    # @param file_name Path to file to write.
586    # @param absolute ??
587    # @param as_lat_long ??
588    # @param isSouthHemisphere ??
589    def export_points_file(self, file_name, absolute=True,
590                           as_lat_long=False, isSouthHemisphere=True):
591        """write a points file as a text (.csv) or binary (.pts) file
592
593        file_name is the file name, including the extension
594        The point_dict is defined at the top of this file.
595
596        If absolute is True data the xll and yll are added to the points value
597        and the xll and yll of the geo_reference are set to 0.
598
599        If absolute is False data points at returned as relative to the xll
600        and yll and geo_reference remains uneffected
601
602        isSouthHemisphere: is only used when getting data
603        points "as_lat_long" is True and if FALSE will return lats and
604        longs valid for the Northern Hemisphere.
605        """
606
607        if (file_name[-4:] == ".pts"):
608            if absolute is True:
609                geo_ref = deepcopy(self.geo_reference)
610                geo_ref.xllcorner = 0
611                geo_ref.yllcorner = 0
612                _write_pts_file(file_name,
613                                self.get_data_points(absolute),
614                                self.get_all_attributes(),
615                                geo_ref)
616            else:
617                _write_pts_file(file_name,
618                                self.get_data_points(absolute),
619                                self.get_all_attributes(),
620                                self.get_geo_reference())
621        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
622            msg = "ERROR: trying to write a .txt file with relative data."
623            assert absolute, msg
624            _write_csv_file(file_name,
625                            self.get_data_points(absolute=True,
626                                                 as_lat_long=as_lat_long,
627                                           isSouthHemisphere=isSouthHemisphere),
628                            self.get_all_attributes(),
629                            as_lat_long=as_lat_long)
630        elif file_name[-4:] == ".urs" :
631            msg = "ERROR: Can not write a .urs file as a relative file."
632            assert absolute, msg
633            _write_urs_file(file_name,
634                            self.get_data_points(as_lat_long=True,
635                                           isSouthHemisphere=isSouthHemisphere))
636        else:
637            msg = 'Unknown file type %s ' %file_name
638            raise IOError, msg
639
640    ##
641    # @brief Get a subset of data that is referred to by 'indices'.
642    # @param indices A list of indices to select data subset with.
643    # @return A geospatial object containing data subset.
644    def get_sample(self, indices):
645        """ Returns a object which is a subset of the original
646        and the data points and attributes in this new object refer to
647        the indices provided
648
649        Input
650            indices- a list of integers that represent the new object
651        Output
652            New geospatial data object representing points specified by
653            the indices
654        """
655
656        # FIXME: add the geo_reference to this
657        points = self.get_data_points()
658        sampled_points = num.take(points, indices, axis=0)
659
660        attributes = self.get_all_attributes()
661
662        sampled_attributes = {}
663        if attributes is not None:
664            for key, att in attributes.items():
665                sampled_attributes[key] = num.take(att, indices, axis=0)
666
667        return Geospatial_data(sampled_points, sampled_attributes)
668
669    ##
670    # @brief Split one geospatial object into two.
671    # @param factor Relative size to make first result object.
672    # @param seed_num Random 'seed' - used only for unit test.
673    # @param verbose True if this function is to be verbose.
674    # @note Points in each result object are selected randomly.
675    def split(self, factor=0.5, seed_num=None, verbose=False):
676        """Returns two geospatial_data object, first is the size of the 'factor'
677        smaller the original and the second is the remainder. The two
678        new objects are disjoint sets of each other.
679
680        Points of the two new object have selected RANDOMLY.
681
682        This method create two lists of indices which are passed into
683        get_sample.  The lists are created using random numbers, and
684        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
685        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
686
687        Input -  the factor which to split the object, if 0.1 then 10% of the
688                 together object will be returned
689
690        Output - two geospatial_data objects that are disjoint sets of the
691                 original
692        """
693
694        i = 0
695        self_size = len(self)
696        random_list = []
697        remainder_list = []
698        new_size = round(factor * self_size)
699
700        # Find unique random numbers
701        if verbose: log.critical("make unique random number list "
702                                 "and get indices")
703
704        total = num.array(range(self_size), num.int)    #array default#
705        total_list = total.tolist()
706
707        if verbose: log.critical("total list len=%d" % len(total_list))
708
709        # There will be repeated random numbers however will not be a
710        # problem as they are being 'pop'ed out of array so if there
711        # are two numbers the same they will pop different indicies,
712        # still basically random
713        ## create list of non-unquie random numbers
714        if verbose: log.critical("create random numbers list %s long"
715                                 % str(new_size))
716
717        # Set seed if provided, mainly important for unit test!
718        # plus recalcule seed when no seed provided.
719        if seed_num is not None:
720            seed(seed_num)
721        else:
722            seed()
723
724        random_num = randint(0, self_size-1, (int(new_size),))
725        random_num = random_num.tolist()
726
727        # need to sort and reverse so the pop() works correctly
728        random_num.sort()
729        random_num.reverse()
730
731        if verbose: log.critical("make random number list and get indices")
732
733        j = 0
734        k = 1
735        remainder_list = total_list[:]
736
737        # pops array index (random_num) from remainder_list
738        # (which starts as the total_list and appends to random_list)
739        random_num_len = len(random_num)
740        for i in random_num:
741            random_list.append(remainder_list.pop(i))
742            j += 1
743            # prints progress
744            if verbose and round(random_num_len/10*k) == j:
745                log.critical('(%s/%s)' % (j, random_num_len))
746                k += 1
747
748        # FIXME: move to tests, it might take a long time
749        # then create an array of random length between 500 and 1000,
750        # and use a random factor between 0 and 1
751        # setup for assertion
752        test_total = random_list[:]
753        test_total.extend(remainder_list)
754        test_total.sort()
755        msg = ('The two random lists made from the original list when added '
756               'together DO NOT equal the original list')
757        assert total_list == test_total, msg
758
759        # Get new samples
760        if verbose: log.critical("get values of indices for random list")
761        G1 = self.get_sample(random_list)
762        if verbose: log.critical("get values of indices for "
763                                 "opposite of random list")
764        G2 = self.get_sample(remainder_list)
765
766        return G1, G2
767
768    ##
769    # @brief Allow iteration over this object.
770    def __iter__(self):
771        """Read in the header, number_of_points and save the
772        file pointer position
773        """
774
775        # FIXME - what to do if the file isn't there
776
777        # FIXME (Ole): Shouldn't this go into the constructor?
778        # This method acts like the constructor when blocking.
779        # ... and shouldn't it be called block_size?
780        #
781        if self.max_read_lines is None:
782            self.max_read_lines = MAX_READ_LINES
783
784        if self.file_name[-4:] == ".pts":
785            # See if the file is there.  Throw a QUIET IO error if it isn't
786            fd = open(self.file_name,'r')
787            fd.close()
788
789            # Throws prints to screen if file not present
790            self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
791
792            (self.blocking_georef,
793             self.blocking_keys,
794             self.number_of_points) = _read_pts_file_header(self.fid,
795                                                            self.verbose)
796            self.start_row = 0
797            self.last_row = self.number_of_points
798            self.show_verbose = 0
799            self.verbose_block_size = (self.last_row + 10)/10
800            self.block_number = 0
801            self.number_of_blocks = self.number_of_points/self.max_read_lines
802            # This computes the number of full blocks. The last block may be
803            # smaller and won't be included in this estimate.
804
805            if self.verbose is True:
806                log.critical('Reading %d points (in ~%d blocks) from file %s. '
807                             % (self.number_of_points, self.number_of_blocks,
808                                self.file_name))
809                log.critical('Each block consists of %d data points'
810                             % self.max_read_lines)
811        else:
812            # Assume the file is a csv file
813            file_pointer = open(self.file_name)
814            self.header, self.file_pointer = _read_csv_file_header(file_pointer)
815            self.blocking_georef = None # Used for reconciling zones
816
817        return self
818
819    ##
820    # @brief Read another block into the instance.
821    def next(self):
822        """read a block, instanciate a new geospatial and return it"""
823
824        if self.file_name[-4:] == ".pts":
825            if self.start_row == self.last_row:
826                # Read the end of the file last iteration
827                # Remove blocking attributes
828                self.fid.close()
829                del self.max_read_lines
830                del self.blocking_georef
831                del self.last_row
832                del self.start_row
833                del self.blocking_keys
834                del self.fid
835                raise StopIteration
836            fin_row = self.start_row + self.max_read_lines
837            if fin_row > self.last_row:
838                fin_row = self.last_row
839
840            if self.verbose is True:
841                if (self.show_verbose >= self.start_row
842                    and self.show_verbose < fin_row):
843                    log.critical('Reading block %d (points %d to %d) out of %d'
844                                 % (self.block_number, self.start_row,
845                                    fin_row, self.number_of_blocks))
846
847                    self.show_verbose += max(self.max_read_lines,
848                                             self.verbose_block_size)
849
850            # Read next block
851            pointlist, att_dict, = _read_pts_file_blocking(self.fid,
852                                                           self.start_row,
853                                                           fin_row,
854                                                           self.blocking_keys)
855
856            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
857            self.start_row = fin_row
858
859            self.block_number += 1
860        else:
861            # Assume the file is a csv file
862            try:
863                (pointlist,
864                 att_dict,
865                 geo_ref,
866                 self.file_pointer) = _read_csv_file_blocking(self.file_pointer,
867                                                              self.header[:],
868                                                              max_read_lines=
869                                                           self.max_read_lines,
870                                                              verbose=
871                                                                  self.verbose)
872
873                # Check that the zones haven't changed.
874                if geo_ref is not None:
875                    geo_ref.reconcile_zones(self.blocking_georef)
876                    self.blocking_georef = geo_ref
877                elif self.blocking_georef is not None:
878                    msg = ('Geo reference given, then not given.'
879                           ' This should not happen.')
880                    raise ValueError, msg
881                geo = Geospatial_data(pointlist, att_dict, geo_ref)
882            except StopIteration:
883                self.file_pointer.close()
884                del self.header
885                del self.file_pointer
886                raise StopIteration
887            except ANUGAError:
888                self.file_pointer.close()
889                del self.header
890                del self.file_pointer
891                raise
892            except SyntaxError:
893                self.file_pointer.close()
894                del self.header
895                del self.file_pointer
896                # This should only be if there is a format error
897                msg = ('Could not open file %s.\n%s'
898                       % (self.file_name, Error_message['IOError']))
899                raise SyntaxError, msg
900        return geo
901
902##################### Error messages ###########
903Error_message = {}
904Em = Error_message
905Em['IOError'] = ('NOTE: The format for a comma separated .txt/.csv file is:\n'
906                 '        1st line:     [column names]\n'
907                 '        other lines:  [x value], [y value], [attributes]\n'
908                 '\n'
909                 '           for example:\n'
910                 '           x, y, elevation, friction\n'
911                 '           0.6, 0.7, 4.9, 0.3\n'
912                 '           1.9, 2.8, 5, 0.3\n'
913                 '           2.7, 2.4, 5.2, 0.3\n'
914                 '\n'
915                 'The first two columns are assumed to be x, y coordinates.\n'
916                 'The attribute values must be numeric.\n')
917
918##
919# @brief ??
920# @param latitudes ??
921# @param longitudes ??
922# @param geo_reference ??
923# @param data_points ??
924# @param points_are_lats_longs ??
925# @note IS THIS USED???
926def _set_using_lat_long(latitudes,
927                        longitudes,
928                        geo_reference,
929                        data_points,
930                        points_are_lats_longs):
931    """If the points has lat long info, assume it is in (lat, long) order."""
932
933    if geo_reference is not None:
934        msg = ('A georeference is specified yet latitude and longitude '
935               'are also specified!')
936        raise ValueError, msg
937
938    if data_points is not None and not points_are_lats_longs:
939        msg = ('Data points are specified yet latitude and longitude are '
940               'also specified.')
941        raise ValueError, msg
942
943    if points_are_lats_longs:
944        if data_points is None:
945            msg = "Data points are not specified."
946            raise ValueError, msg
947        lats_longs = ensure_numeric(data_points)
948        latitudes = num.ravel(lats_longs[:,0:1])
949        longitudes = num.ravel(lats_longs[:,1:])
950
951    if latitudes is None and longitudes is None:
952        msg = "Latitudes and Longitudes are not specified."
953        raise ValueError, msg
954
955    if latitudes is None:
956        msg = "Longitudes are specified yet latitudes aren't."
957        raise ValueError, msg
958
959    if longitudes is None:
960        msg = "Latitudes are specified yet longitudes aren't."
961        raise ValueError, msg
962
963    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
964                                                    longitudes=longitudes)
965    return data_points, Geo_reference(zone=zone)
966
967
968##
969# @brief Read a .pts data file.
970# @param file_name Path to file to read.
971# @param verbose True if this function is to be verbose.
972# @return (pointlist, attributes, geo_reference)
973def _read_pts_file(file_name, verbose=False):
974    """Read .pts NetCDF file
975
976    Return a (dict_points, dict_attribute, geo_ref)
977    eg
978    dict['points'] = [[1.0,2.0],[3.0,5.0]]
979    dict['attributelist']['elevation'] = [[7.0,5.0]]
980    """
981
982    if verbose: log.critical('Reading %s' % file_name)
983
984    # See if the file is there.  Throw a QUIET IO error if it isn't
985    fd = open(file_name,'r')
986    fd.close()
987
988    # Throws prints to screen if file not present
989    fid = NetCDFFile(file_name, netcdf_mode_r)
990
991    pointlist = num.array(fid.variables['points'])
992    keys = fid.variables.keys()
993
994    if verbose: log.critical('Got %d variables: %s' % (len(keys), keys))
995
996    try:
997        keys.remove('points')
998    except IOError, e:
999        fid.close()
1000        msg = "Expected keyword 'points' but could not find it"
1001        raise IOError, msg
1002
1003    attributes = {}
1004    for key in keys:
1005        if verbose: log.critical("reading attribute '%s'" % key)
1006
1007        attributes[key] = num.array(fid.variables[key])
1008
1009    try:
1010        geo_reference = Geo_reference(NetCDFObject=fid)
1011    except AttributeError, e:
1012        geo_reference = None
1013
1014    fid.close()
1015
1016    return pointlist, attributes, geo_reference
1017
1018
1019##
1020# @brief Read a .csv data file.
1021# @param file_name Path to the .csv file to read.
1022# @param verbose True if this function is to be verbose.
1023def _read_csv_file(file_name, verbose=False):
1024    """Read .csv file
1025
1026    Return a dic of array of points, and dic of array of attribute
1027    eg
1028    dic['points'] = [[1.0,2.0],[3.0,5.0]]
1029    dic['attributelist']['elevation'] = [[7.0,5.0]]
1030    """
1031
1032    file_pointer = open(file_name)
1033    header, file_pointer = _read_csv_file_header(file_pointer)
1034    try:
1035        (pointlist,
1036         att_dict,
1037         geo_ref,
1038         file_pointer) = _read_csv_file_blocking(file_pointer,
1039                                                 header,
1040                                                 max_read_lines=1e30)
1041                                    # If the file is bigger that this, block..
1042                                    # FIXME (Ole) What's up here?
1043    except ANUGAError:
1044        file_pointer.close()
1045        raise
1046
1047    file_pointer.close()
1048
1049    return pointlist, att_dict, geo_ref
1050
1051
1052##
1053# @brief Read a .csv file header.
1054# @param file_pointer Open descriptor of the file to read.
1055# @param delimiter Header line delimiter string, split on this string.
1056# @param verbose True if this function is to be verbose.
1057# @return A tuple of (<cleaned header string>, <input file_pointer>)
1058
1059CSV_DELIMITER = ','
1060
1061def _read_csv_file_header(file_pointer,
1062                          delimiter=CSV_DELIMITER,
1063                          verbose=False):
1064    """Read the header of a .csv file
1065    Return a list of the header names
1066    """
1067
1068    line = file_pointer.readline()
1069    header = clean_line(line, delimiter)
1070
1071    return header, file_pointer
1072
1073##
1074# @brief Read a .csv file, with blocking.
1075# @param file_pointer Open descriptor of the file to read.
1076# @param header List of already read .csv header fields.
1077# @param delimiter Delimiter string header was split on.
1078# @param max_read_lines The max number of lines to read before blocking.
1079# @param verbose True if this function is to be verbose.
1080# @note Will throw IndexError, SyntaxError exceptions.
1081def _read_csv_file_blocking(file_pointer,
1082                            header,
1083                            delimiter=CSV_DELIMITER,
1084                            max_read_lines=MAX_READ_LINES,
1085                            verbose=False):
1086    """Read the body of a .csv file.
1087    header: The list header of the csv file, with the x and y labels.
1088    """
1089
1090    points = []
1091    pointattributes = []
1092    att_dict = {}
1093
1094    # This is to remove the x and y headers.
1095    header = header[:]
1096    try:
1097        x_header = header.pop(0)
1098        y_header = header.pop(0)
1099    except IndexError:
1100        # if there are not two columns this will occur.
1101        # eg if it is a space seperated file
1102        raise SyntaxError
1103
1104    read_lines = 0
1105    while read_lines < max_read_lines:
1106        line = file_pointer.readline()
1107        numbers = clean_line(line, delimiter)
1108        if len(numbers) <= 1:
1109            break
1110        if line[0] == '#':
1111            continue
1112
1113        read_lines += 1
1114
1115        try:
1116            x = float(numbers[0])
1117            y = float(numbers[1])
1118            points.append([x,y])
1119            numbers.pop(0)
1120            numbers.pop(0)
1121            if len(header) != len(numbers):
1122                file_pointer.close()
1123                msg = ('File load error. '
1124                       'There might be a problem with the file header.')
1125                raise SyntaxError, msg
1126            for i,n in enumerate(numbers):
1127                n.strip()
1128                if n != '\n' and n != '':
1129                    att_dict.setdefault(header[i],[]).append(float(n))
1130        except ValueError:
1131            raise SyntaxError
1132
1133    if points == []:
1134        raise StopIteration
1135
1136    pointlist = num.array(points, num.float)
1137    for key in att_dict.keys():
1138        att_dict[key] = num.array(att_dict[key], num.float)
1139
1140    # Do stuff here so the info is in lat's and longs
1141    geo_ref = None
1142    x_header = lower(x_header[:3])
1143    y_header = lower(y_header[:3])
1144    if (x_header == 'lon' or  x_header == 'lat') \
1145       and (y_header == 'lon' or  y_header == 'lat'):
1146        if x_header == 'lon':
1147            longitudes = num.ravel(pointlist[:,0:1])
1148            latitudes = num.ravel(pointlist[:,1:])
1149        else:
1150            latitudes = num.ravel(pointlist[:,0:1])
1151            longitudes = num.ravel(pointlist[:,1:])
1152
1153        pointlist, geo_ref = _set_using_lat_long(latitudes,
1154                                                 longitudes,
1155                                                 geo_reference=None,
1156                                                 data_points=None,
1157                                                 points_are_lats_longs=False)
1158
1159    return pointlist, att_dict, geo_ref, file_pointer
1160
1161
1162##
1163# @brief Read a .pts file header.
1164# @param fid Handle to the open .pts file.
1165# @param verbose True if the function is to be verbose.
1166# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1167# @note Will throw IOError and AttributeError exceptions.
1168def _read_pts_file_header(fid, verbose=False):
1169    '''Read the geo_reference and number_of_points from a .pts file'''
1170
1171    keys = fid.variables.keys()
1172    try:
1173        keys.remove('points')
1174    except IOError, e:
1175        fid.close()
1176        msg = "Expected keyword 'points' but could not find it."
1177        raise IOError, msg
1178
1179    if verbose: log.critical('Got %d variables: %s' % (len(keys), keys))
1180
1181    try:
1182        geo_reference = Geo_reference(NetCDFObject=fid)
1183    except AttributeError, e:
1184        geo_reference = None
1185
1186    return geo_reference, keys, fid.dimensions['number_of_points']
1187
1188
1189##
1190# @brief Read the body of a .pts file, with blocking.
1191# @param fid Handle to already open file.
1192# @param start_row Start row index of points to return.
1193# @param fin_row End row index of points to return.
1194# @param keys Iterable of keys to return.
1195# @return Tuple of (pointlist, attributes).
1196def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1197    '''Read the body of a .pts file.'''
1198
1199    pointlist = num.array(fid.variables['points'][start_row:fin_row])
1200
1201    attributes = {}
1202    for key in keys:
1203        attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1204
1205    return pointlist, attributes
1206
1207
1208##
1209# @brief Write a .pts data file.
1210# @param file_name Path to the file to write.
1211# @param write_data_points Data points to write.
1212# @param write_attributes Attributes to write.
1213# @param write_geo_reference Georef to write.
1214def _write_pts_file(file_name,
1215                    write_data_points,
1216                    write_attributes=None,
1217                    write_geo_reference=None):
1218    """Write .pts NetCDF file
1219
1220    NOTE: Below might not be valid ask Duncan : NB 5/2006
1221
1222    WARNING: This function mangles the point_atts data structure
1223    # F??ME: (DSG)This format has issues.
1224    # There can't be an attribute called points
1225    # consider format change
1226    # method changed by NB not sure if above statement is correct
1227
1228    should create new test for this
1229    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1230    for key in point_atts.keys():
1231        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1232        assert key in legal_keys, msg
1233    """
1234
1235    # NetCDF file definition
1236    outfile = NetCDFFile(file_name, netcdf_mode_w)
1237
1238    # Create new file
1239    outfile.institution = 'Geoscience Australia'
1240    outfile.description = ('NetCDF format for compact and portable storage '
1241                           'of spatial point data')
1242
1243    # Dimension definitions
1244    shape = write_data_points.shape[0]
1245    outfile.createDimension('number_of_points', shape)
1246    outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1247
1248    # Variable definition
1249    outfile.createVariable('points', netcdf_float,
1250                           ('number_of_points', 'number_of_dimensions'))
1251
1252    # create variables
1253    outfile.variables['points'][:] = write_data_points
1254
1255    if write_attributes is not None:
1256        for key in write_attributes.keys():
1257            outfile.createVariable(key, netcdf_float, ('number_of_points',))
1258            outfile.variables[key][:] = write_attributes[key]
1259
1260    if write_geo_reference is not None:
1261        write_NetCDF_georeference(write_geo_reference, outfile)
1262
1263    outfile.close()
1264
1265
1266##
1267# @brief Write a .csv data file.
1268# @param file_name Path to the file to write.
1269# @param write_data_points Data points to write.
1270# @param write_attributes Attributes to write.
1271# @param as_lat_long True if points are lat/lon, else x/y.
1272# @param delimiter The CSV delimiter to use.
1273def _write_csv_file(file_name,
1274                    write_data_points,
1275                    write_attributes=None,
1276                    as_lat_long=False,
1277                    delimiter=','):
1278    """Write a .csv file."""
1279
1280    points = write_data_points
1281    pointattributes = write_attributes
1282
1283    fd = open(file_name, 'w')
1284
1285    if as_lat_long:
1286        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1287    else:
1288        titlelist = "x" + delimiter + "y"  + delimiter
1289
1290    if pointattributes is not None:
1291        for title in pointattributes.keys():
1292            titlelist = titlelist + title + delimiter
1293        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1294
1295    fd.write(titlelist + "\n")
1296
1297    # <x/lat> <y/long> [attributes]
1298    for i, vert in enumerate( points):
1299        if pointattributes is not None:
1300            attlist = ","
1301            for att in pointattributes.keys():
1302                attlist = attlist + str(pointattributes[att][i]) + delimiter
1303            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1304            attlist.strip()
1305        else:
1306            attlist = ''
1307
1308        fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1309
1310    fd.close()
1311
1312
1313##
1314# @brief Write a URS file.
1315# @param file_name The path of the file to write.
1316# @param points
1317# @param delimiter
1318def _write_urs_file(file_name, points, delimiter=' '):
1319    """Write a URS format file.
1320    export a file, file_name, with the urs format
1321    the data points are in lats and longs
1322    """
1323
1324    fd = open(file_name, 'w')
1325
1326    # first line is # points
1327    fd.write(str(len(points)) + "\n")
1328
1329    # <lat> <long> <id#>
1330    for i, vert in enumerate( points):
1331        fd.write(str(round(vert[0],7)) + delimiter +
1332                 str(round(vert[1],7)) + delimiter + str(i) + "\n")
1333
1334    fd.close()
1335
1336
1337##
1338# @brief ??
1339# @param point_atts ??
1340# @return ??
1341def _point_atts2array(point_atts):
1342    point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1343
1344    for key in point_atts['attributelist'].keys():
1345        point_atts['attributelist'][key] = \
1346                num.array(point_atts['attributelist'][key], num.float)
1347
1348    return point_atts
1349
1350
1351##
1352# @brief Convert geospatial object to a points dictionary.
1353# @param geospatial_data The geospatial object to convert.
1354# @return A points dictionary.
1355def geospatial_data2points_dictionary(geospatial_data):
1356    """Convert geospatial data to points_dictionary"""
1357
1358    points_dictionary = {}
1359    points_dictionary['pointlist'] = geospatial_data.data_points
1360
1361    points_dictionary['attributelist'] = {}
1362
1363    for attribute_name in geospatial_data.attributes.keys():
1364        val = geospatial_data.attributes[attribute_name]
1365        points_dictionary['attributelist'][attribute_name] = val
1366
1367    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1368
1369    return points_dictionary
1370
1371
1372##
1373# @brief Convert a points dictionary to a geospatial object.
1374# @param points_dictionary A points dictionary to convert.
1375def points_dictionary2geospatial_data(points_dictionary):
1376    """Convert points_dictionary to geospatial data object"""
1377
1378    msg = "Points dictionary must have key 'pointlist'"
1379    assert points_dictionary.has_key('pointlist'), msg
1380
1381    msg = "Points dictionary must have key 'attributelist'"
1382    assert points_dictionary.has_key('attributelist'), msg
1383
1384    if points_dictionary.has_key('geo_reference'):
1385        geo = points_dictionary['geo_reference']
1386    else:
1387        geo = None
1388
1389    return Geospatial_data(points_dictionary['pointlist'],
1390                           points_dictionary['attributelist'],
1391                           geo_reference=geo)
1392
1393
1394##
1395# @brief Ensure that points are in absolute coordinates.
1396# @param points A list or array of points to check, or geospatial object.
1397# @param geo_reference If supplied,
1398# @return ??
1399def ensure_absolute(points, geo_reference=None):
1400    """Ensure that points are in absolute coordinates.
1401
1402    This function inputs several formats and
1403    outputs one format. - a numeric array of absolute points.
1404
1405    Input formats are;
1406      points: List or numeric array of coordinate pairs [xi, eta] of
1407              points or geospatial object or points file name
1408
1409    mesh_origin: A geo_reference object or 3-tuples consisting of
1410                 UTM zone, easting and northing.
1411                 If specified vertex coordinates are assumed to be
1412                 relative to their respective origins.
1413    """
1414
1415    # Input check
1416    if isinstance(points, basestring):
1417        # It's a string - assume it is a point file
1418        points = Geospatial_data(file_name=points)
1419
1420    if isinstance(points, Geospatial_data):
1421        points = points.get_data_points(absolute=True)
1422        msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1423        assert geo_reference == None, msg
1424    else:
1425        points = ensure_numeric(copy.copy(points), num.float)
1426
1427    # Sort of geo_reference and convert points
1428    if geo_reference is None:
1429        geo = None    # Geo_reference()
1430    else:
1431        if isinstance(geo_reference, Geo_reference):
1432            geo = geo_reference
1433        else:
1434            geo = Geo_reference(geo_reference[0],
1435                                geo_reference[1],
1436                                geo_reference[2])
1437        points = geo.get_absolute(points)
1438
1439    return points
1440
1441
1442##
1443# @brief
1444# @param points
1445# @param geo_reference
1446# @return A geospatial object.
1447def ensure_geospatial(points, geo_reference=None):
1448    """Convert various data formats to a geospatial_data instance.
1449
1450    Inputed formats are;
1451    points:      List or numeric array of coordinate pairs [xi, eta] of
1452                 points or geospatial object
1453
1454    mesh_origin: A geo_reference object or 3-tuples consisting of
1455                 UTM zone, easting and northing.
1456                 If specified vertex coordinates are assumed to be
1457                 relative to their respective origins.
1458    """
1459
1460    # Input check
1461    if isinstance(points, Geospatial_data):
1462        msg = "Use a Geospatial_data object or a mesh origin, not both."
1463        assert geo_reference is None, msg
1464        return points
1465    else:
1466        # List or numeric array of absolute points
1467        points = ensure_numeric(points, num.float)
1468
1469    # Sort out geo reference
1470    if geo_reference is None:
1471        geo = None
1472    else:
1473        if isinstance(geo_reference, Geo_reference):
1474            geo = geo_reference
1475        else:
1476            geo = Geo_reference(geo_reference[0],
1477                                geo_reference[1],
1478                                geo_reference[2])
1479
1480    # Create Geospatial_data object with appropriate geo reference and return
1481    points = Geospatial_data(data_points=points, geo_reference=geo)
1482
1483    return points
1484
1485
1486##
1487# @brief
1488# @param data_file
1489# @param alpha_list
1490# @param mesh_file
1491# @param boundary_poly
1492# @param mesh_resolution
1493# @param north_boundary
1494# @param south_boundary
1495# @param east_boundary
1496# @param west_boundary
1497# @param plot_name
1498# @param split_factor
1499# @param seed_num
1500# @param cache
1501# @param verbose
1502def find_optimal_smoothing_parameter(data_file,
1503                                     alpha_list=None,
1504                                     mesh_file=None,
1505                                     boundary_poly=None,
1506                                     mesh_resolution=100000,
1507                                     north_boundary=None,
1508                                     south_boundary=None,
1509                                     east_boundary=None,
1510                                     west_boundary=None,
1511                                     plot_name='all_alphas',
1512                                     split_factor=0.1,
1513                                     seed_num=None,
1514                                     cache=False,
1515                                     verbose=False):
1516    """Removes a small random sample of points from 'data_file'.
1517    Then creates models with different alpha values from 'alpha_list' and
1518    cross validates the predicted value to the previously removed point data.
1519    Returns the alpha value which has the smallest covariance.
1520
1521    data_file: must not contain points outside the boundaries defined
1522               and it must be either a pts, txt or csv file.
1523
1524    alpha_list: the alpha values to test in a single list
1525
1526    mesh_file: name of the created mesh file or if passed in will read it.
1527               NOTE, if there is a mesh file mesh_resolution,
1528               north_boundary, south... etc will be ignored.
1529
1530    mesh_resolution: the maximum area size for a triangle
1531
1532    north_boundary... west_boundary: the value of the boundary
1533
1534    plot_name: the name for the plot contain the results
1535
1536    seed_num: the seed to the random number generator
1537
1538    USAGE:
1539        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1540                                             alpha_list=[0.0001, 0.01, 1],
1541                                             mesh_file=None,
1542                                             mesh_resolution=3,
1543                                             north_boundary=5,
1544                                             south_boundary=-5,
1545                                             east_boundary=5,
1546                                             west_boundary=-5,
1547                                             plot_name='all_alphas',
1548                                             seed_num=100000,
1549                                             verbose=False)
1550
1551    OUTPUT: returns the minumum normalised covalance calculate AND the
1552           alpha that created it. PLUS writes a plot of the results
1553
1554    NOTE: code will not work if the data_file extent is greater than the
1555    boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1556    """
1557
1558    from anuga.shallow_water import Domain
1559    from anuga.geospatial_data.geospatial_data import Geospatial_data
1560    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1561    from anuga.utilities.numerical_tools import cov
1562    from anuga.utilities.polygon import is_inside_polygon
1563    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1564
1565    attribute_smoothed = 'elevation'
1566
1567    if mesh_file is None:
1568        if verbose: log.critical("building mesh")
1569        mesh_file = 'temp.msh'
1570
1571        if (north_boundary is None or south_boundary is None
1572            or east_boundary is None or west_boundary is None):
1573            no_boundary = True
1574        else:
1575            no_boundary = False
1576
1577        if no_boundary is True:
1578            msg = 'All boundaries must be defined'
1579            raise Expection, msg
1580
1581        poly_topo = [[east_boundary, south_boundary],
1582                     [east_boundary, north_boundary],
1583                     [west_boundary, north_boundary],
1584                     [west_boundary, south_boundary]]
1585
1586        create_mesh_from_regions(poly_topo,
1587                                 boundary_tags={'back': [2],
1588                                                'side': [1,3],
1589                                                'ocean': [0]},
1590                                 maximum_triangle_area=mesh_resolution,
1591                                 filename=mesh_file,
1592                                 use_cache=cache,
1593                                 verbose=verbose)
1594
1595    else: # if mesh file provided
1596        # test mesh file exists?
1597        if verbose: "reading from file: %s" % mesh_file
1598        if access(mesh_file,F_OK) == 0:
1599            msg = "file %s doesn't exist!" % mesh_file
1600            raise IOError, msg
1601
1602    # split topo data
1603    if verbose: log.critical('Reading elevation file: %s' % data_file)
1604    G = Geospatial_data(file_name = data_file)
1605    if verbose: log.critical('Start split')
1606    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1607    if verbose: log.critical('Finish split')
1608    points = G_small.get_data_points()
1609
1610    if verbose: log.critical("Number of points in sample to compare: %d"
1611                             % len(points))
1612
1613    if alpha_list == None:
1614        alphas = [0.001,0.01,100]
1615        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,
1616        #          0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1617    else:
1618        alphas = alpha_list
1619
1620    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1621    # 4 onwards is the elevation_predicted using the alpha, which will
1622    # be compared later against the real removed data
1623    data = num.array([], dtype=num.float)
1624
1625    data = num.resize(data, (len(points), 3+len(alphas)))
1626
1627    # gets relative point from sample
1628    data[:,0] = points[:,0]
1629    data[:,1] = points[:,1]
1630    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1631    data[:,2] = elevation_sample
1632
1633    normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1634
1635    if verbose: log.critical('Setup computational domains with '
1636                             'different alphas')
1637
1638    for i, alpha in enumerate(alphas):
1639        # add G_other data to domains with different alphas
1640        if verbose:
1641            log.critical('Calculating domain and mesh for Alpha=%s'
1642                         % str(alpha))
1643        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1644        if verbose: log.critical(domain.statistics())
1645        domain.set_quantity(attribute_smoothed,
1646                            geospatial_data=G_other,
1647                            use_cache=cache,
1648                            verbose=verbose,
1649                            alpha=alpha)
1650
1651        # Convert points to geospatial data for use with get_values below
1652        points_geo = Geospatial_data(points, domain.geo_reference)
1653
1654        # returns the predicted elevation of the points that were "split" out
1655        # of the original data set for one particular alpha
1656        if verbose: log.critical('Get predicted elevation for location '
1657                                 'to be compared')
1658        elevation_predicted = \
1659                domain.quantities[attribute_smoothed].\
1660                    get_values(interpolation_points=points_geo)
1661
1662        # add predicted elevation to array that starts with x, y, z...
1663        data[:,i+3] = elevation_predicted
1664
1665        sample_cov = cov(elevation_sample)
1666        ele_cov = cov(elevation_sample - elevation_predicted)
1667        normal_cov[i,:] = [alpha, ele_cov / sample_cov]
1668
1669        if verbose:
1670            log.critical('Covariance for alpha %s=%s'
1671                         % (normal_cov[i][0], normal_cov[i][1]))
1672            log.critical('--------------------------------------------')
1673
1674    normal_cov0 = normal_cov[:,0]
1675    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1676
1677    if plot_name is not None:
1678        from pylab import savefig, semilogx, loglog
1679
1680        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1681        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1682        savefig(plot_name, dpi=300)
1683
1684    if mesh_file == 'temp.msh':
1685        remove(mesh_file)
1686
1687    if verbose:
1688        log.critical('Final results:')
1689        for i, alpha in enumerate(alphas):
1690            log.critical('covariance for alpha %s = %s '
1691                         % (normal_cov[i][0], normal_cov[i][1]))
1692        log.critical('Optimal alpha is: %s '
1693                     % normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0])
1694
1695    # covariance and optimal alpha
1696    return (min(normal_cov_new[:,1]),
1697            normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1698
1699
1700##
1701# @brief
1702# @param data_file
1703# @param alpha_list
1704# @param mesh_file
1705# @param boundary_poly
1706# @param mesh_resolution
1707# @param north_boundary
1708# @param south_boundary
1709# @param east_boundary
1710# @param west_boundary
1711# @param plot_name
1712# @param split_factor
1713# @param seed_num
1714# @param cache
1715# @param verbose
1716def old_find_optimal_smoothing_parameter(data_file,
1717                                         alpha_list=None,
1718                                         mesh_file=None,
1719                                         boundary_poly=None,
1720                                         mesh_resolution=100000,
1721                                         north_boundary=None,
1722                                         south_boundary=None,
1723                                         east_boundary=None,
1724                                         west_boundary=None,
1725                                         plot_name='all_alphas',
1726                                         split_factor=0.1,
1727                                         seed_num=None,
1728                                         cache=False,
1729                                         verbose=False):
1730    """
1731    data_file: must not contain points outside the boundaries defined
1732               and it either a pts, txt or csv file.
1733
1734    alpha_list: the alpha values to test in a single list
1735
1736    mesh_file: name of the created mesh file or if passed in will read it.
1737               NOTE, if there is a mesh file mesh_resolution,
1738               north_boundary, south... etc will be ignored.
1739
1740    mesh_resolution: the maximum area size for a triangle
1741
1742    north_boundary... west_boundary: the value of the boundary
1743
1744    plot_name: the name for the plot contain the results
1745
1746    seed_num: the seed to the random number generator
1747
1748    USAGE:
1749        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1750                                             alpha_list=[0.0001, 0.01, 1],
1751                                             mesh_file=None,
1752                                             mesh_resolution=3,
1753                                             north_boundary=5,
1754                                             south_boundary=-5,
1755                                             east_boundary=5,
1756                                             west_boundary=-5,
1757                                             plot_name='all_alphas',
1758                                             seed_num=100000,
1759                                             verbose=False)
1760
1761    OUTPUT: returns the minumum normalised covalance calculate AND the
1762            alpha that created it. PLUS writes a plot of the results
1763
1764    NOTE: code will not work if the data_file extend is greater than the
1765          boundary_polygon or the north_boundary...west_boundary
1766    """
1767
1768    from anuga.shallow_water import Domain
1769    from anuga.geospatial_data.geospatial_data import Geospatial_data
1770    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1771    from anuga.utilities.numerical_tools import cov
1772    from anuga.utilities.polygon import is_inside_polygon
1773    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1774
1775    attribute_smoothed = 'elevation'
1776
1777    if mesh_file is None:
1778        mesh_file = 'temp.msh'
1779
1780        if (north_boundary is None or south_boundary is None
1781            or east_boundary is None or west_boundary is None):
1782            no_boundary = True
1783        else:
1784            no_boundary = False
1785
1786        if no_boundary is True:
1787            msg = 'All boundaries must be defined'
1788            raise Expection, msg
1789
1790        poly_topo = [[east_boundary, south_boundary],
1791                     [east_boundary, north_boundary],
1792                     [west_boundary, north_boundary],
1793                     [west_boundary, south_boundary]]
1794
1795        create_mesh_from_regions(poly_topo,
1796                                 boundary_tags={'back': [2],
1797                                                'side': [1,3],
1798                                                'ocean': [0]},
1799                                 maximum_triangle_area=mesh_resolution,
1800                                 filename=mesh_file,
1801                                 use_cache=cache,
1802                                 verbose=verbose)
1803
1804    else: # if mesh file provided
1805        # test mesh file exists?
1806        if access(mesh_file,F_OK) == 0:
1807            msg = "file %s doesn't exist!" % mesh_file
1808            raise IOError, msg
1809
1810    # split topo data
1811    G = Geospatial_data(file_name=data_file)
1812    if verbose: log.critical('start split')
1813    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1814    if verbose: log.critical('finish split')
1815    points = G_small.get_data_points()
1816
1817    if verbose: log.critical("Number of points in sample to compare: %d"
1818                             % len(points))
1819
1820    if alpha_list == None:
1821        alphas = [0.001,0.01,100]
1822        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,
1823        #          0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1824    else:
1825        alphas = alpha_list
1826
1827    domains = {}
1828
1829    if verbose: log.critical('Setup computational domains with '
1830                             'different alphas')
1831
1832    for alpha in alphas:
1833        # add G_other data to domains with different alphas
1834        if verbose:
1835            log.critical('Calculating domain and mesh for Alpha = %s' %
1836                         str(alpha))
1837        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1838        if verbose: log.critical(domain.statistics())
1839        domain.set_quantity(attribute_smoothed,
1840                            geospatial_data=G_other,
1841                            use_cache=cache,
1842                            verbose=verbose,
1843                            alpha=alpha)
1844        domains[alpha] = domain
1845
1846    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1847    # 4 onwards is the elevation_predicted using the alpha, which will
1848    # be compared later against the real removed data
1849    data = num.array([], dtype=num.float)
1850
1851    data = num.resize(data, (len(points), 3+len(alphas)))
1852
1853    # gets relative point from sample
1854    data[:,0] = points[:,0]
1855    data[:,1] = points[:,1]
1856    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1857    data[:,2] = elevation_sample
1858
1859    normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1860
1861    if verbose:
1862        log.critical('Determine difference between predicted results '
1863                     'and actual data')
1864
1865    for i, alpha in enumerate(domains):
1866        if verbose: print'Alpha =', alpha
1867
1868        points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1869        # returns the predicted elevation of the points that were "split" out
1870        # of the original data set for one particular alpha
1871        elevation_predicted = \
1872                domains[alpha].quantities[attribute_smoothed].\
1873                        get_values(interpolation_points=points_geo)
1874
1875        # add predicted elevation to array that starts with x, y, z...
1876        data[:,i+3] = elevation_predicted
1877
1878        sample_cov = cov(elevation_sample)
1879        ele_cov = cov(elevation_sample - elevation_predicted)
1880        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1881        log.critical('memory usage during compare: %s' % str(mem_usage()))
1882        if verbose: log.critical('cov %s = %s'
1883                                 % (normal_cov[i][0], normal_cov[i][1]))
1884
1885    normal_cov0 = normal_cov[:,0]
1886    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1887
1888    if plot_name is not None:
1889        from pylab import savefig,semilogx,loglog
1890
1891        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1892        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1893        savefig(plot_name, dpi=300)
1894    if mesh_file == 'temp.msh':
1895        remove(mesh_file)
1896
1897    return (min(normal_cov_new[:,1]),
1898            normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1899
1900
1901if __name__ == "__main__":
1902    pass
1903
Note: See TracBrowser for help on using the repository browser.