source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6410

Last change on this file since 6410 was 6410, checked in by rwilson, 15 years ago

numpy changes.

File size: 69.8 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5
6from sys import maxint
7from os import access, F_OK, R_OK,remove
8from types import DictType
9from warnings import warn
10from string import lower
11from RandomArray import randint, seed, get_seed
12from copy import deepcopy
13from Scientific.IO.NetCDF import NetCDFFile
14
15import numpy as num
16
17from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18from anuga.utilities.numerical_tools import ensure_numeric
19from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22from anuga.utilities.system_tools import clean_line
23from anuga.utilities.anuga_exceptions import ANUGAError
24from anuga.config import points_file_block_line_size as MAX_READ_LINES
25from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
26from anuga.config import netcdf_float
27
28DEFAULT_ATTRIBUTE = 'elevation'
29
30
31##
32# @brief ??
33class Geospatial_data:
34
35    ##
36    # @brief
37    # @param data_points Mx2 iterable of tuples or array of x,y coordinates.
38    # @param attributes Associated values for each data point.
39    # @param geo_reference ??
40    # @param default_attribute_name ??
41    # @param file_name
42    # @param latitudes ??
43    # @param longitudes ??
44    # @param points_are_lats_longs True if points are lat/long, not UTM.
45    # @param max_read_lines Size of block to read, if blocking.
46    # @param load_file_now True if blocking but we eant to read file now.
47    # @param verbose True if this class instance is verbose.
48    def __init__(self,
49                 data_points=None, # this can also be a points file name
50                 attributes=None,
51                 geo_reference=None,
52                 default_attribute_name=None,
53                 file_name=None,
54                 latitudes=None,
55                 longitudes=None,
56                 points_are_lats_longs=False,
57                 max_read_lines=None,
58                 load_file_now=True,
59                 verbose=False):
60        """Create instance from data points and associated attributes
61
62        data_points: x,y coordinates in meters. Type must be either a
63        sequence of 2-tuples or an Mx2 numeric array of floats.  A file name
64        with extension .txt, .cvs or .pts can also be passed in here.
65
66        attributes: Associated values for each data point. The type
67        must be either a list or an array of length M or a dictionary
68        of lists (or arrays) of length M. In the latter case the keys
69        in the dictionary represent the attribute names, in the former
70        the attribute will get the default name "elevation".
71
72        geo_reference: Object representing the origin of the data
73        points. It contains UTM zone, easting and northing and data
74        points are assumed to be relative to this origin.
75        If geo_reference is None, the default geo ref object is used.
76
77        default_attribute_name: Name of default attribute to be used with
78        get_attribute_values. The idea is that the dataset can be
79        equipped with information about which attribute to return.
80        If None, the default is the "first"
81
82        latitudes, longitudes: Vectors of latitudes and longitudes,
83        used to specify location instead of points.
84
85        points_are_lats_longs: Set this as true if the points are actually
86        lats and longs, not UTM
87
88        max_read_lines: The number of rows read into memory when using
89        blocking to read a file.
90
91        load_file_now:  If true the file is automatically loaded
92        into the geospatial instance. Used when blocking.
93
94        file_name: Name of input netCDF file or .txt file. netCDF file must
95        have dimensions "points" etc.
96        .txt file is a comma seperated file with x, y and attribute
97        data.
98
99        The first line has the titles of the columns.  The first two
100        column titles are checked to see if they start with lat or
101        long (not case sensitive).  If so the data is assumed to be
102        latitude and longitude, in decimal format and converted to
103        UTM.  Otherwise the first two columns are assumed to be the x
104        and y, and the title names acually used are ignored.
105
106
107        The format for a .txt file is:
108            1st line:     [column names]
109            other lines:  x y [attributes]
110
111            for example:
112            x, y, elevation, friction
113            0.6, 0.7, 4.9, 0.3
114            1.9, 2.8, 5, 0.3
115            2.7, 2.4, 5.2, 0.3
116
117        The first two columns have to be x, y or lat, long
118        coordinates.
119
120
121        The format for a Points dictionary is:
122          ['pointlist'] a 2 column array describing points. 1st column x,
123          2nd column y.
124          ['attributelist'], a dictionary of 1D arrays, representing
125          attribute values at the point.  The dictionary key is the attribute
126          header.
127          ['geo_reference'] a Geo_refernece object. Use if the point
128          information is relative. This is optional.
129            eg
130            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
131            dic['attributelist']['elevation'] = [[7.0,5.0]
132
133        verbose:
134        """
135
136        if isinstance(data_points, basestring):
137            # assume data_points is really a file name
138            file_name = data_points
139
140        self.set_verbose(verbose)
141        self.geo_reference = None # create the attribute
142        self.file_name = file_name
143
144        if max_read_lines is None:
145            self.max_read_lines = MAX_READ_LINES
146        else:
147            self.max_read_lines = max_read_lines
148
149        if file_name is None:
150            if latitudes is not None \
151               or longitudes is not None \
152               or points_are_lats_longs:
153                data_points, geo_reference = \
154                    _set_using_lat_long(latitudes=latitudes,
155                                        longitudes=longitudes,
156                                        geo_reference=geo_reference,
157                                        data_points=data_points,
158                                        points_are_lats_longs=\
159                                        points_are_lats_longs)
160            self.check_data_points(data_points)
161            self.set_attributes(attributes)
162            self.set_geo_reference(geo_reference)
163            self.set_default_attribute_name(default_attribute_name)
164        elif load_file_now is True:
165            # watch for case where file name and points,
166            # attributes etc are provided!!
167            # if file name then all provided info will be removed!
168
169            if verbose is True:
170                if file_name is not None:
171                    print 'Loading Geospatial data from file: %s' % file_name
172
173            self.import_points_file(file_name, verbose=verbose)
174
175            self.check_data_points(self.data_points)
176            self.set_attributes(self.attributes)
177            self.set_geo_reference(self.geo_reference)
178            self.set_default_attribute_name(default_attribute_name)
179
180        if verbose is True:
181            if file_name is not None:
182                print 'Geospatial data created from file: %s' % file_name
183                if load_file_now is False:
184                    print 'Data will be loaded blockwise on demand'
185
186                    if file_name.endswith('csv') or file_name.endswith('txt'):
187                        pass
188                        # This message was misleading.
189                        # FIXME (Ole): Are we blocking here or not?
190                        # print 'ASCII formats are not that great for '
191                        # print 'blockwise reading. Consider storing this'
192                        # print 'data as a pts NetCDF format'
193
194    ##
195    # @brief Return length of the points set.
196    def __len__(self):
197        return len(self.data_points)
198
199    ##
200    # @brief Return a string representation of the points set.
201    def __repr__(self):
202        return str(self.get_data_points(absolute=True))
203
204    ##
205    # @brief Check data points.
206    # @param data_points Points data to check and store in instance.
207    # @note Throws ValueError exception if no data.
208    def check_data_points(self, data_points):
209        """Checks data points"""
210
211        if data_points is None:
212            self.data_points = None
213            msg = 'There is no data or file provided!'
214            raise ValueError, msg
215
216        else:
217            self.data_points = ensure_numeric(data_points)
218            if not (0,) == self.data_points.shape:
219                assert len(self.data_points.shape) == 2
220                assert self.data_points.shape[1] == 2
221
222    ##
223    # @brief Check and assign attributes data.
224    # @param attributes Dictionary or scalar to save as .attributes.
225    # @note Throws exception if unable to convert dict keys to numeric.
226    def set_attributes(self, attributes):
227        """Check and assign attributes dictionary"""
228
229        if attributes is None:
230            self.attributes = None
231            return
232
233        if not isinstance(attributes, DictType):
234            # Convert single attribute into dictionary
235            attributes = {DEFAULT_ATTRIBUTE: attributes}
236
237        # Check input attributes
238        for key in attributes.keys():
239            try:
240                attributes[key] = ensure_numeric(attributes[key])
241            except:
242                msg = ("Attribute '%s' (%s) could not be converted to a"
243                       "numeric vector" % (str(key), str(attributes[key])))
244                raise Exception, msg
245
246        self.attributes = attributes
247
248    ##
249    # @brief Set the georeference of geospatial data.
250    # @param geo_reference The georeference data to set.
251    # @note Will raise exception if param not instance of Geo_reference.
252    def set_geo_reference(self, geo_reference):
253        """Set the georeference of geospatial data.
254
255        It can also be used to change the georeference and will ensure that
256        the absolute coordinate values are unchanged.
257        """
258
259        from anuga.coordinate_transforms.geo_reference import Geo_reference
260
261        if geo_reference is None:
262            # Use default - points are in absolute coordinates
263            geo_reference = Geo_reference()
264
265        # Allow for tuple (zone, xllcorner, yllcorner)
266        geo_reference = ensure_geo_reference(geo_reference)
267
268        if not isinstance(geo_reference, Geo_reference):
269            # FIXME (Ole): This exception will be raised even
270            # if geo_reference is None. Is that the intent Duncan?
271            msg = 'Argument geo_reference must be a valid Geo_reference '
272            msg += 'object or None.'
273            raise Expection, msg
274
275        # If a geo_reference already exists, change the point data according to
276        # the new geo reference
277        if  self.geo_reference is not None:
278            self.data_points = self.get_data_points(geo_reference=geo_reference)
279
280        self.geo_reference = geo_reference
281
282    ##
283    # @brief Set default attribute name.
284    # @param default_attribute_name The default to save.
285    def set_default_attribute_name(self, default_attribute_name):
286        self.default_attribute_name = default_attribute_name
287
288    ##
289    # @brief Set the instance verbose flag.
290    # @param verbose The value to save.
291    # @note Will raise exception if param is not True or False.
292    def set_verbose(self, verbose=False):
293        if verbose in [False, True]:
294            self.verbose = verbose
295        else:
296            msg = 'Illegal value: %s' % str(verbose)
297            raise Exception, msg
298
299    ##
300    # @brief Clip geospatial data by a given polygon.
301    # @param polygon The polygon to clip with.
302    # @param closed True if points on clip boundary are not included in result.
303    # @param verbose True if this function is verbose.
304    def clip(self, polygon, closed=True, verbose=False):
305        """Clip geospatial data by a polygon
306
307        Input
308          polygon - Either a list of points, an Nx2 array or
309                    a Geospatial data object.
310          closed - (optional) determine whether points on boundary should be
311          regarded as belonging to the polygon (closed = True)
312          or not (closed = False). Default is True.
313
314        Output
315          New geospatial data object representing points inside
316          specified polygon.
317
318
319        Note - this method is non-destructive and leaves the data in 'self'
320        unchanged
321        """
322
323        from anuga.utilities.polygon import inside_polygon
324
325        if isinstance(polygon, Geospatial_data):
326            # Polygon is an object - extract points
327            polygon = polygon.get_data_points()
328
329        points = self.get_data_points()
330        inside_indices = inside_polygon(points, polygon, closed, verbose)
331
332        clipped_G = self.get_sample(inside_indices)
333
334        return clipped_G
335
336    ##
337    # @brief Clip points data by polygon, return points outside polygon.
338    # @param polygon The polygon to clip with.
339    # @param closed True if points on clip boundary are not included in result.
340    # @param verbose True if this function is verbose.
341    def clip_outside(self, polygon, closed=True, verbose=False):
342        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
343
344        Input
345          polygon - Either a list of points, an Nx2 array or
346                    a Geospatial data object.
347          closed - (optional) determine whether points on boundary should be
348          regarded as belonging to the polygon (closed = True)
349          or not (closed = False). Default is True.
350
351        Output
352          Geospatial data object representing point OUTSIDE specified polygon
353        """
354
355        from anuga.utilities.polygon import outside_polygon
356
357        if isinstance(polygon, Geospatial_data):
358            # Polygon is an object - extract points
359            polygon = polygon.get_data_points()
360
361        points = self.get_data_points()
362        outside_indices = outside_polygon(points, polygon, closed,verbose)
363
364        clipped_G = self.get_sample(outside_indices)
365
366        return clipped_G
367
368    ##
369    # @brief Get instance geo_reference data.
370    def get_geo_reference(self):
371        return self.geo_reference
372
373    ##
374    # @brief Get coordinates for all data points as an Nx2 array.
375    # @param absolute If True, return UTM, else relative to xll/yll corners.
376    # @param geo_reference If supplied, points are relative to it.
377    # @param as_lat_long If True, return points as lat/lon.
378    # @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
379    # @return A set of data points, in appropriate form.
380    def get_data_points(self, absolute=True, geo_reference=None,
381                        as_lat_long=False, isSouthHemisphere=True):
382        """Get coordinates for all data points as an Nx2 array
383
384        If absolute is False returned coordinates are relative to the
385        internal georeference's xll and yll corners, otherwise
386        absolute UTM coordinates are returned.
387
388        If a geo_reference is passed the points are returned relative
389        to that geo_reference.
390
391        isSH (isSouthHemisphere) is only used when getting data
392        points "as_lat_long" is True and if FALSE will return lats and
393        longs valid for the Northern Hemisphere.
394
395        Default: absolute is True.
396        """
397
398        if as_lat_long is True:
399            msg = "Points need a zone to be converted into lats and longs"
400            assert self.geo_reference is not None, msg
401            zone = self.geo_reference.get_zone()
402            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
403            lats_longs = []
404            for point in self.get_data_points(True):
405                # UTMtoLL(northing, easting, zone,
406                lat_calced, long_calced = UTMtoLL(point[1], point[0],
407                                                  zone, isSouthHemisphere)
408                lats_longs.append((lat_calced, long_calced)) # to hash
409            return lats_longs
410
411        if absolute is True and geo_reference is None:
412            return self.geo_reference.get_absolute(self.data_points)
413        elif geo_reference is not None:
414            return geo_reference.change_points_geo_ref(self.data_points,
415                                                       self.geo_reference)
416        else:
417            # If absolute is False
418            return self.data_points
419
420    ##
421    # @brief Get value for attribute name.
422    # @param attribute_name Name to get value for.
423    # @note If name passed is None, return default attribute value.
424    def get_attributes(self, attribute_name=None):
425        """Return values for one named attribute.
426
427        If attribute_name is None, default_attribute_name is used
428        """
429
430        if attribute_name is None:
431            if self.default_attribute_name is not None:
432                attribute_name = self.default_attribute_name
433            else:
434                attribute_name = self.attributes.keys()[0]
435                # above line takes the first one from keys
436
437        if self.verbose is True:
438            print 'Using attribute %s' %attribute_name
439            print 'Available attributes: %s' %(self.attributes.keys())
440
441        msg = 'Attribute name %s does not exist in data set' % attribute_name
442        assert self.attributes.has_key(attribute_name), msg
443
444        return self.attributes[attribute_name]
445
446    ##
447    # @brief Get all instance attributes.
448    # @return The instance attribute dictionary, or None if no attributes.
449    def get_all_attributes(self):
450        """Return values for all attributes.
451        The return value is either None or a dictionary (possibly empty).
452        """
453
454        return self.attributes
455
456    ##
457    # @brief Override __add__() to allow addition of geospatial objects.
458    # @param self This object.
459    # @param other The second object.
460    # @return The new geospatial object.
461    def __add__(self, other):
462        """Returns the addition of 2 geospatical objects,
463        objects are concatencated to the end of each other
464
465        NOTE: doesn't add if objects contain different
466        attributes
467
468        Always return absolute points!
469        This also means, that if you add None to the object,
470        it will be turned into absolute coordinates
471
472        other can be None in which case nothing is added to self.
473        """
474
475        # find objects zone and checks if the same
476        geo_ref1 = self.get_geo_reference()
477        zone1 = geo_ref1.get_zone()
478
479        if other is not None:
480            geo_ref2 = other.get_geo_reference()
481            zone2 = geo_ref2.get_zone()
482            geo_ref1.reconcile_zones(geo_ref2)
483            new_points = num.concatenate((self.get_data_points(absolute=True),
484                                          other.get_data_points(absolute=True)),
485                                         axis = 0)
486
487            # Concatenate attributes if any
488            if self.attributes is None:
489                if other.attributes is not None:
490                    msg = 'Geospatial data must have the same '
491                    msg += 'attributes to allow addition.'
492                    raise Exception, msg
493
494                new_attributes = None
495            else:
496                new_attributes = {}
497                for x in self.attributes.keys():
498                    if other.attributes.has_key(x):
499                        attrib1 = self.attributes[x]
500                        attrib2 = other.attributes[x]
501                        new_attributes[x] = num.concatenate((attrib1, attrib2))
502                    else:
503                        msg = 'Geospatial data must have the same \n'
504                        msg += 'attributes to allow addition.'
505                        raise Exception, msg
506        else:
507            # other is None:
508            new_points = self.get_data_points(absolute=True)
509            new_attributes = self.attributes
510
511        # Instantiate new data object and return absolute coordinates
512        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
513        return Geospatial_data(new_points, new_attributes, new_geo_ref)
514
515    ##
516    # @brief Override the addition case where LHS isn't geospatial object.
517    # @param self This object.
518    # @param other The second object.
519    # @return The new geospatial object.
520    def __radd__(self, other):
521        """Handle cases like None + Geospatial_data(...)"""
522
523        return self + other
524
525    ###
526    #  IMPORT/EXPORT POINTS FILES
527    ###
528
529    ##
530    # @brief Import a .txt, .csv or .pts points data file.
531    # @param file_name
532    # @param delimiter
533    # @param verbose True if this function is to be verbose.
534    # @note Will throw IOError or SyntaxError if there is a problem.
535    def import_points_file(self, file_name, delimiter=None, verbose=False):
536        """ load an .txt, .csv or .pts file
537
538        Note: will throw an IOError/SyntaxError if it can't load the file.
539        Catch these!
540
541        Post condition: self.attributes dictionary has been set
542        """
543
544        if access(file_name, F_OK) == 0 :
545            msg = 'File %s does not exist or is not accessible' % file_name
546            raise IOError, msg
547
548        attributes = {}
549        if file_name[-4:] == ".pts":
550            try:
551                data_points, attributes, geo_reference = \
552                             _read_pts_file(file_name, verbose)
553            except IOError, e:
554                msg = 'Could not open file %s ' % file_name
555                raise IOError, msg
556        elif file_name[-4:] == ".txt" or file_name[-4:]== ".csv":
557            try:
558                data_points, attributes, geo_reference = \
559                             _read_csv_file(file_name, verbose)
560            except IOError, e:
561                # This should only be if a file is not found
562                msg = 'Could not open file %s. ' % file_name
563                msg += 'Check the file location.'
564                raise IOError, msg
565            except SyntaxError, e:
566                # This should only be if there is a format error
567                msg = 'Problem with format of file %s. \n' %file_name
568                msg += Error_message['IOError']
569                raise SyntaxError, msg
570        else:
571            msg = 'Extension %s is unknown' %file_name[-4:]
572            raise IOError, msg
573
574        self.data_points = data_points
575        self.attributes = attributes
576        self.geo_reference = geo_reference
577
578    ##
579    # @brief Write points data to a file (.csv or .pts).
580    # @param file_name Path to file to write.
581    # @param absolute ??
582    # @param as_lat_long ??
583    # @param isSouthHemisphere ??
584    def export_points_file(self, file_name, absolute=True,
585                           as_lat_long=False, isSouthHemisphere=True):
586        """write a points file as a text (.csv) or binary (.pts) file
587
588        file_name is the file name, including the extension
589        The point_dict is defined at the top of this file.
590
591        If absolute is True data the xll and yll are added to the points value
592        and the xll and yll of the geo_reference are set to 0.
593
594        If absolute is False data points at returned as relative to the xll
595        and yll and geo_reference remains uneffected
596
597        isSouthHemisphere: is only used when getting data
598        points "as_lat_long" is True and if FALSE will return lats and
599        longs valid for the Northern Hemisphere.
600        """
601
602        if (file_name[-4:] == ".pts"):
603            if absolute is True:
604                geo_ref = deepcopy(self.geo_reference)
605                geo_ref.xllcorner = 0
606                geo_ref.yllcorner = 0
607                _write_pts_file(file_name,
608                                self.get_data_points(absolute),
609                                self.get_all_attributes(),
610                                geo_ref)
611            else:
612                _write_pts_file(file_name,
613                                self.get_data_points(absolute),
614                                self.get_all_attributes(),
615                                self.get_geo_reference())
616
617        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
618            msg = "ERROR: trying to write a .txt file with relative data."
619            assert absolute, msg
620            _write_csv_file(file_name,
621                            self.get_data_points(absolute=True,
622                                                 as_lat_long=as_lat_long,
623                                  isSouthHemisphere=isSouthHemisphere),
624                            self.get_all_attributes(),
625                            as_lat_long=as_lat_long)
626
627        elif file_name[-4:] == ".urs" :
628            msg = "ERROR: Can not write a .urs file as a relative file."
629            assert absolute, msg
630            _write_urs_file(file_name,
631                            self.get_data_points(as_lat_long=True,
632                                  isSouthHemisphere=isSouthHemisphere))
633
634        else:
635            msg = 'Unknown file type %s ' %file_name
636            raise IOError, msg
637
638    ##
639    # @brief Get a subset of data that is referred to by 'indices'.
640    # @param indices A list of indices to select data subset with.
641    # @return A geospatial object containing data subset.
642    def get_sample(self, indices):
643        """ Returns a object which is a subset of the original
644        and the data points and attributes in this new object refer to
645        the indices provided
646
647        Input
648            indices- a list of integers that represent the new object
649        Output
650            New geospatial data object representing points specified by
651            the indices
652        """
653
654        # FIXME: add the geo_reference to this
655        points = self.get_data_points()
656        sampled_points = num.take(points, indices, axis=0)
657
658        attributes = self.get_all_attributes()
659
660        sampled_attributes = {}
661        if attributes is not None:
662            for key, att in attributes.items():
663                sampled_attributes[key] = num.take(att, indices, axis=0)
664
665        return Geospatial_data(sampled_points, sampled_attributes)
666
667    ##
668    # @brief Split one geospatial object into two.
669    # @param factor Relative size to make first result object.
670    # @param seed_num Random 'seed' - used only for unit test.
671    # @param verbose True if this function is to be verbose.
672    # @note Points in each result object are selected randomly.
673    def split(self, factor=0.5, seed_num=None, verbose=False):
674        """Returns two geospatial_data object, first is the size of the 'factor'
675        smaller the original and the second is the remainder. The two
676        new objects are disjoint sets of each other.
677
678        Points of the two new object have selected RANDOMLY.
679
680        This method create two lists of indices which are passed into
681        get_sample.  The lists are created using random numbers, and
682        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
683        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
684
685        Input -  the factor which to split the object, if 0.1 then 10% of the
686                 together object will be returned
687
688        Output - two geospatial_data objects that are disjoint sets of the
689                 original
690        """
691
692        i = 0
693        self_size = len(self)
694        random_list = []
695        remainder_list = []
696        new_size = round(factor*self_size)
697
698        # Find unique random numbers
699        if verbose: print "make unique random number list and get indices"
700
701        total = num.array(range(self_size), num.int)    #array default#
702        total_list = total.tolist()
703
704        if verbose: print "total list len", len(total_list)
705
706        # There will be repeated random numbers however will not be a
707        # problem as they are being 'pop'ed out of array so if there
708        # are two numbers the same they will pop different indicies,
709        # still basically random
710        ## create list of non-unquie random numbers
711        if verbose: print "create random numbers list %s long" %new_size
712
713        # Set seed if provided, mainly important for unit test!
714        # plus recalcule seed when no seed provided.
715        if seed_num != None:
716            seed(seed_num, seed_num)
717        else:
718            seed()
719
720        if verbose: print "seed:", get_seed()
721
722        random_num = randint(0, self_size-1, (int(new_size),))
723        random_num = random_num.tolist()
724
725        # need to sort and reverse so the pop() works correctly
726        random_num.sort()
727        random_num.reverse()
728
729        if verbose: print "make random number list and get indices"
730
731        j = 0
732        k = 1
733        remainder_list = total_list[:]
734
735        # pops array index (random_num) from remainder_list
736        # (which starts as the
737        # total_list and appends to random_list
738        random_num_len = len(random_num)
739        for i in random_num:
740            random_list.append(remainder_list.pop(i))
741            j += 1
742            # prints progress
743            if verbose and round(random_num_len/10*k) == j:
744                print '(%s/%s)' % (j, random_num_len)
745                k += 1
746
747        # FIXME: move to tests, it might take a long time
748        # then create an array of random lenght between 500 and 1000,
749        # and use a random factor between 0 and 1
750        # setup for assertion
751        test_total = random_list[:]
752        test_total.extend(remainder_list)
753        test_total.sort()
754        msg = 'The two random lists made from the original list when added ' \
755              'together DO NOT equal the original list'
756        assert total_list == test_total, msg
757
758        # Get new samples
759        if verbose: print "get values of indices for random list"
760        G1 = self.get_sample(random_list)
761        if verbose: print "get values of indices for opposite of random list"
762        G2 = self.get_sample(remainder_list)
763
764        return G1, G2
765
766    ##
767    # @brief Allow iteration over this object.
768    def __iter__(self):
769        """Read in the header, number_of_points and save the
770        file pointer position
771        """
772        from Scientific.IO.NetCDF import NetCDFFile
773        # FIXME - what to do if the file isn't there
774
775        # FIXME (Ole): Shouldn't this go into the constructor?
776        # This method acts like the constructor when blocking.
777        # ... and shouldn't it be called block_size?
778        #
779        if self.max_read_lines is None:
780            self.max_read_lines = MAX_READ_LINES
781
782        if self.file_name[-4:] == ".pts":
783            # See if the file is there.  Throw a QUIET IO error if it isn't
784            fd = open(self.file_name,'r')
785            fd.close()
786
787            # Throws prints to screen if file not present
788            self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
789
790            (self.blocking_georef,
791             self.blocking_keys,
792             self.number_of_points) = _read_pts_file_header(self.fid,
793                                                            self.verbose)
794            self.start_row = 0
795            self.last_row = self.number_of_points
796            self.show_verbose = 0
797            self.verbose_block_size = (self.last_row + 10)/10
798            self.block_number = 0
799            self.number_of_blocks = self.number_of_points/self.max_read_lines
800            # This computes the number of full blocks. The last block may be
801            # smaller and won't be ircluded in this estimate.
802
803            if self.verbose is True:
804                print 'Reading %d points (in ~%d blocks) from file %s. ' \
805                      % (self.number_of_points,
806                         self.number_of_blocks,
807                         self.file_name),
808                print 'Each block consists of %d data points' \
809                      % self.max_read_lines
810
811        else:
812            # Assume the file is a csv file
813            file_pointer = open(self.file_name)
814            self.header, self.file_pointer = _read_csv_file_header(file_pointer)
815            self.blocking_georef = None # Used for reconciling zones
816
817        return self
818
819    ##
820    # @brief Read another block into the instance.
821    def next(self):
822        """read a block, instanciate a new geospatial and return it"""
823
824        if self.file_name[-4:] == ".pts":
825            if self.start_row == self.last_row:
826                # Read the end of the file last iteration
827                # Remove blocking attributes
828                self.fid.close()
829                del self.max_read_lines
830                del self.blocking_georef
831                del self.last_row
832                del self.start_row
833                del self.blocking_keys
834                del self.fid
835                raise StopIteration
836            fin_row = self.start_row + self.max_read_lines
837            if fin_row > self.last_row:
838                fin_row = self.last_row
839
840            if self.verbose is True:
841                if self.show_verbose >= self.start_row \
842                   and self.show_verbose < fin_row:
843                    print 'Reading block %d (points %d to %d) out of %d'\
844                          %(self.block_number,
845                            self.start_row,
846                            fin_row,
847                            self.number_of_blocks)
848
849                    self.show_verbose += max(self.max_read_lines,
850                                             self.verbose_block_size)
851
852
853            # Read next block
854            pointlist, att_dict, = _read_pts_file_blocking(self.fid,
855                                                           self.start_row,
856                                                           fin_row,
857                                                           self.blocking_keys)
858
859            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
860            self.start_row = fin_row
861
862            self.block_number += 1
863
864        else:
865            # Assume the file is a csv file
866            try:
867                (pointlist,
868                 att_dict,
869                 geo_ref,
870                 self.file_pointer) = \
871                        _read_csv_file_blocking(self.file_pointer,
872                                                self.header[:],
873                                                max_read_lines=\
874                                                    self.max_read_lines,
875                                                verbose=self.verbose)
876
877                # Check that the zones haven't changed.
878                if geo_ref is not None:
879                    geo_ref.reconcile_zones(self.blocking_georef)
880                    self.blocking_georef = geo_ref
881                elif self.blocking_georef is not None:
882                    msg = 'Geo reference given, then not given.'
883                    msg += ' This should not happen.'
884                    raise ValueError, msg
885                geo = Geospatial_data(pointlist, att_dict, geo_ref)
886            except StopIteration:
887                self.file_pointer.close()
888                del self.header
889                del self.file_pointer
890                raise StopIteration
891            except ANUGAError:
892                self.file_pointer.close()
893                del self.header
894                del self.file_pointer
895                raise
896            except SyntaxError:
897                self.file_pointer.close()
898                del self.header
899                del self.file_pointer
900                # This should only be if there is a format error
901                msg = 'Could not open file %s. \n' % self.file_name
902                msg += Error_message['IOError']
903                raise SyntaxError, msg
904        return geo
905
906
907##################### Error messages ###########
908Error_message = {}
909Em = Error_message
910Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
911Em['IOError'] += "        1st line:     [column names]\n"
912Em['IOError'] += "        other lines:  [x value], [y value], [attributes]\n"
913Em['IOError'] += "\n"
914Em['IOError'] += "           for example:\n"
915Em['IOError'] += "           x, y, elevation, friction\n"
916Em['IOError'] += "           0.6, 0.7, 4.9, 0.3\n"
917Em['IOError'] += "           1.9, 2.8, 5, 0.3\n"
918Em['IOError'] += "           2.7, 2.4, 5.2, 0.3\n"
919Em['IOError'] += "\n"
920Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
921Em['IOError'] += "The attribute values must be numeric.\n"
922
923##
924# @brief ??
925# @param latitudes ??
926# @param longitudes ??
927# @param geo_reference ??
928# @param data_points ??
929# @param points_are_lats_longs ??
930def _set_using_lat_long(latitudes,
931                        longitudes,
932                        geo_reference,
933                        data_points,
934                        points_are_lats_longs):
935    """If the points has lat long info, assume it is in (lat, long) order."""
936
937    if geo_reference is not None:
938        msg = "A georeference is specified yet latitude and longitude " \
939              "are also specified!"
940        raise ValueError, msg
941
942    if data_points is not None and not points_are_lats_longs:
943        msg = "Data points are specified yet latitude and longitude are " \
944              "also specified."
945        raise ValueError, msg
946
947    if points_are_lats_longs:
948        if data_points is None:
949            msg = "Data points are not specified."
950            raise ValueError, msg
951        lats_longs = ensure_numeric(data_points)
952        latitudes = num.ravel(lats_longs[:,0:1])
953        longitudes = num.ravel(lats_longs[:,1:])
954
955    if latitudes is None and longitudes is None:
956        msg = "Latitudes and Longitudes are not specified."
957        raise ValueError, msg
958
959    if latitudes is None:
960        msg = "Longitudes are specified yet latitudes aren't."
961        raise ValueError, msg
962
963    if longitudes is None:
964        msg = "Latitudes are specified yet longitudes aren't."
965        raise ValueError, msg
966
967    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
968                                                    longitudes=longitudes)
969    return data_points, Geo_reference(zone=zone)
970
971
972##
973# @brief Read a .pts data file.
974# @param file_name Path to file to read.
975# @param verbose True if this function is to be verbose.
976# @return (pointlist, attributes, geo_reference)
977def _read_pts_file(file_name, verbose=False):
978    """Read .pts NetCDF file
979
980    Return a dic of array of points, and dic of array of attribute
981    eg
982    dic['points'] = [[1.0,2.0],[3.0,5.0]]
983    dic['attributelist']['elevation'] = [[7.0,5.0]]
984    """
985
986    from Scientific.IO.NetCDF import NetCDFFile
987
988    if verbose: print 'Reading ', file_name
989
990    # See if the file is there.  Throw a QUIET IO error if it isn't
991    fd = open(file_name,'r')
992    fd.close()
993
994    # Throws prints to screen if file not present
995    fid = NetCDFFile(file_name, netcdf_mode_r)
996
997    pointlist = num.array(fid.variables['points'])
998    keys = fid.variables.keys()
999
1000    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1001
1002    try:
1003        keys.remove('points')
1004    except IOError, e:
1005        fid.close()
1006        msg = "Expected keyword 'points' but could not find it"
1007        raise IOError, msg
1008
1009    attributes = {}
1010    for key in keys:
1011        if verbose: print "reading attribute '%s'" % key
1012
1013        attributes[key] = num.array(fid.variables[key])
1014
1015    try:
1016        geo_reference = Geo_reference(NetCDFObject=fid)
1017    except AttributeError, e:
1018        geo_reference = None
1019
1020    fid.close()
1021
1022    return pointlist, attributes, geo_reference
1023
1024
1025##
1026# @brief Read a .csv data file.
1027# @param file_name Path to the .csv file to read.
1028# @param verbose True if this function is to be verbose.
1029def _read_csv_file(file_name, verbose=False):
1030    """Read .csv file
1031
1032    Return a dic of array of points, and dic of array of attribute
1033    eg
1034    dic['points'] = [[1.0,2.0],[3.0,5.0]]
1035    dic['attributelist']['elevation'] = [[7.0,5.0]]
1036    """
1037
1038    file_pointer = open(file_name)
1039    header, file_pointer = _read_csv_file_header(file_pointer)
1040    try:
1041        (pointlist,
1042         att_dict,
1043         geo_ref,
1044         file_pointer) = _read_csv_file_blocking(file_pointer,
1045                                                 header,
1046                                                 max_read_lines=1e30)
1047                                    # If the file is bigger that this, block..
1048                                    # FIXME (Ole) What's up here?
1049    except ANUGAError:
1050        file_pointer.close()
1051        raise
1052
1053    file_pointer.close()
1054
1055    return pointlist, att_dict, geo_ref
1056
1057
1058##
1059# @brief Read a .csv file header.
1060# @param file_pointer Open descriptor of the file to read.
1061# @param delimiter Header line delimiter string, split on this string.
1062# @param verbose True if this function is to be verbose.
1063# @return A tuple of (<cleaned header string>, <input file_pointer>)
1064
1065CSV_DELIMITER = ','
1066
1067def _read_csv_file_header(file_pointer,
1068                          delimiter=CSV_DELIMITER,
1069                          verbose=False):
1070    """Read the header of a .csv file
1071    Return a list of the header names
1072    """
1073
1074    line = file_pointer.readline()
1075    header = clean_line(line, delimiter)
1076
1077    return header, file_pointer
1078
1079##
1080# @brief Read a .csv file, with blocking.
1081# @param file_pointer Open descriptor of the file to read.
1082# @param header List of already read .csv header fields.
1083# @param delimiter Delimiter string header was split on.
1084# @param max_read_lines The max number of lines to read before blocking.
1085# @param verbose True if this function is to be verbose.
1086# @note Will throw IndexError, SyntaxError exceptions.
1087def _read_csv_file_blocking(file_pointer,
1088                            header,
1089                            delimiter=CSV_DELIMITER,
1090                            max_read_lines=MAX_READ_LINES,
1091                            verbose=False):
1092    """Read the body of a .csv file.
1093    header: The list header of the csv file, with the x and y labels.
1094    """
1095
1096    points = []
1097    pointattributes = []
1098    att_dict = {}
1099
1100    # This is to remove the x and y headers.
1101    header = header[:]
1102    try:
1103        x_header = header.pop(0)
1104        y_header = header.pop(0)
1105    except IndexError:
1106        # if there are not two columns this will occur.
1107        # eg if it is a space seperated file
1108        raise SyntaxError
1109
1110    read_lines = 0
1111    while read_lines < max_read_lines:
1112        line = file_pointer.readline()
1113        numbers = clean_line(line, delimiter)
1114        if len(numbers) <= 1:
1115            break
1116        if line[0] == '#':
1117            continue
1118
1119        read_lines += 1
1120
1121        try:
1122            x = float(numbers[0])
1123            y = float(numbers[1])
1124            points.append([x,y])
1125            numbers.pop(0)
1126            numbers.pop(0)
1127            if len(header) != len(numbers):
1128                file_pointer.close()
1129                msg = "File load error. " \
1130                      "There might be a problem with the file header."
1131                raise SyntaxError, msg
1132            for i,n in enumerate(numbers):
1133                n.strip()
1134                if n != '\n' and n != '':
1135                    #attributes.append(float(n))
1136                    att_dict.setdefault(header[i],[]).append(float(n))
1137        except ValueError:
1138            raise SyntaxError
1139
1140    if points == []:
1141        raise StopIteration
1142
1143    pointlist = num.array(points, num.float)
1144    for key in att_dict.keys():
1145        att_dict[key] = num.array(att_dict[key], num.float)
1146
1147    # Do stuff here so the info is in lat's and longs
1148    geo_ref = None
1149    x_header = lower(x_header[:3])
1150    y_header = lower(y_header[:3])
1151    if (x_header == 'lon' or  x_header == 'lat') \
1152       and (y_header == 'lon' or  y_header == 'lat'):
1153        if x_header == 'lon':
1154            longitudes = num.ravel(pointlist[:,0:1])
1155            latitudes = num.ravel(pointlist[:,1:])
1156        else:
1157            latitudes = num.ravel(pointlist[:,0:1])
1158            longitudes = num.ravel(pointlist[:,1:])
1159
1160        pointlist, geo_ref = _set_using_lat_long(latitudes,
1161                                                 longitudes,
1162                                                 geo_reference=None,
1163                                                 data_points=None,
1164                                                 points_are_lats_longs=False)
1165
1166    return pointlist, att_dict, geo_ref, file_pointer
1167
1168
1169##
1170# @brief Read a .pts file header.
1171# @param fid Handle to the open .pts file.
1172# @param verbose True if the function is to be verbose.
1173# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1174# @note Will throw IOError and AttributeError exceptions.
1175def _read_pts_file_header(fid, verbose=False):
1176    """Read the geo_reference and number_of_points from a .pts file"""
1177
1178    keys = fid.variables.keys()
1179    try:
1180        keys.remove('points')
1181    except IOError, e:
1182        fid.close()
1183        msg = "Expected keyword 'points' but could not find it."
1184        raise IOError, msg
1185
1186    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1187
1188    try:
1189        geo_reference = Geo_reference(NetCDFObject=fid)
1190    except AttributeError, e:
1191        geo_reference = None
1192
1193    return geo_reference, keys, fid.dimensions['number_of_points']
1194
1195
1196##
1197# @brief Read the body of a .csf file, with blocking.
1198# @param fid Handle to already open file.
1199# @param start_row Start row index of points to return.
1200# @param fin_row End row index of points to return.
1201# @param keys Iterable of keys to return.
1202# @return Tuple of (pointlist, attributes).
1203def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1204    """Read the body of a .csv file."""
1205
1206    pointlist = num.array(fid.variables['points'][start_row:fin_row])
1207
1208    attributes = {}
1209    for key in keys:
1210        attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1211
1212    return pointlist, attributes
1213
1214
1215##
1216# @brief Write a .pts data file.
1217# @param file_name Path to the file to write.
1218# @param write_data_points Data points to write.
1219# @param write_attributes Attributes to write.
1220# @param write_geo_reference Georef to write.
1221def _write_pts_file(file_name,
1222                    write_data_points,
1223                    write_attributes=None,
1224                    write_geo_reference=None):
1225    """Write .pts NetCDF file
1226
1227    NOTE: Below might not be valid ask Duncan : NB 5/2006
1228
1229    WARNING: This function mangles the point_atts data structure
1230    # F??ME: (DSG)This format has issues.
1231    # There can't be an attribute called points
1232    # consider format change
1233    # method changed by NB not sure if above statement is correct
1234
1235    should create new test for this
1236    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1237    for key in point_atts.keys():
1238        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1239        assert key in legal_keys, msg
1240    """
1241
1242    from Scientific.IO.NetCDF import NetCDFFile
1243
1244    # NetCDF file definition
1245    outfile = NetCDFFile(file_name, netcdf_mode_w)
1246
1247    # Create new file
1248    outfile.institution = 'Geoscience Australia'
1249    outfile.description = ('NetCDF format for compact and portable storage '
1250                           'of spatial point data')
1251
1252    # Dimension definitions
1253    shape = write_data_points.shape[0]
1254    outfile.createDimension('number_of_points', shape)
1255    outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1256
1257    # Variable definition
1258    outfile.createVariable('points', netcdf_float, ('number_of_points',
1259                                                    'number_of_dimensions'))
1260
1261    # create variables
1262    outfile.variables['points'][:] = write_data_points
1263
1264    if write_attributes is not None:
1265        for key in write_attributes.keys():
1266            outfile.createVariable(key, netcdf_float, ('number_of_points',))
1267            outfile.variables[key][:] = write_attributes[key]
1268
1269    if write_geo_reference is not None:
1270        write_NetCDF_georeference(write_geo_reference, outfile)
1271
1272    outfile.close()
1273
1274
1275##
1276# @brief Write a .csv data file.
1277# @param file_name Path to the file to write.
1278# @param write_data_points Data points to write.
1279# @param write_attributes Attributes to write.
1280# @param as_lat_long True if points are lat/lon, else x/y.
1281# @param delimiter The CSV delimiter to use.
1282def _write_csv_file(file_name,
1283                    write_data_points,
1284                    write_attributes=None,
1285                    as_lat_long=False,
1286                    delimiter=','):
1287    """Write a .csv file."""
1288
1289    points = write_data_points
1290    pointattributes = write_attributes
1291
1292    fd = open(file_name, 'w')
1293
1294    if as_lat_long:
1295        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1296    else:
1297        titlelist = "x" + delimiter + "y"  + delimiter
1298
1299    if pointattributes is not None:
1300        for title in pointattributes.keys():
1301            titlelist = titlelist + title + delimiter
1302        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1303
1304    fd.write(titlelist + "\n")
1305
1306    # <x/lat> <y/long> [attributes]
1307    for i, vert in enumerate( points):
1308        if pointattributes is not None:
1309            attlist = ","
1310            for att in pointattributes.keys():
1311                attlist = attlist + str(pointattributes[att][i]) + delimiter
1312            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1313            attlist.strip()
1314        else:
1315            attlist = ''
1316
1317        fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1318
1319    fd.close()
1320
1321
1322##
1323# @brief Write a URS file.
1324# @param file_name The path of the file to write.
1325# @param points
1326# @param delimiter
1327def _write_urs_file(file_name, points, delimiter=' '):
1328    """Write a URS format file.
1329    export a file, file_name, with the urs format
1330    the data points are in lats and longs
1331    """
1332
1333    fd = open(file_name, 'w')
1334
1335    # first line is # points
1336    fd.write(str(len(points)) + "\n")
1337
1338    # <lat> <long> <id#>
1339    for i, vert in enumerate( points):
1340        fd.write(str(round(vert[0],7)) + delimiter +
1341                 str(round(vert[1],7)) + delimiter + str(i) + "\n")
1342
1343    fd.close()
1344
1345
1346##
1347# @brief ??
1348# @param point_atts ??
1349# @return ??
1350def _point_atts2array(point_atts):
1351    point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1352
1353    for key in point_atts['attributelist'].keys():
1354        point_atts['attributelist'][key] = \
1355                num.array(point_atts['attributelist'][key], num.float)
1356
1357    return point_atts
1358
1359
1360##
1361# @brief Convert geospatial object to a points dictionary.
1362# @param geospatial_data The geospatial object to convert.
1363# @return A points dictionary.
1364def geospatial_data2points_dictionary(geospatial_data):
1365    """Convert geospatial data to points_dictionary"""
1366
1367    points_dictionary = {}
1368    points_dictionary['pointlist'] = geospatial_data.data_points
1369
1370    points_dictionary['attributelist'] = {}
1371
1372    for attribute_name in geospatial_data.attributes.keys():
1373        val = geospatial_data.attributes[attribute_name]
1374        points_dictionary['attributelist'][attribute_name] = val
1375
1376    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1377
1378    return points_dictionary
1379
1380
1381##
1382# @brief Convert a points dictionary to a geospatial object.
1383# @param points_dictionary A points dictionary to convert.
1384def points_dictionary2geospatial_data(points_dictionary):
1385    """Convert points_dictionary to geospatial data object"""
1386
1387    msg = 'Points dictionary must have key pointlist'
1388    assert points_dictionary.has_key('pointlist'), msg
1389
1390    msg = 'Points dictionary must have key attributelist'
1391    assert points_dictionary.has_key('attributelist'), msg
1392
1393    if points_dictionary.has_key('geo_reference'):
1394        geo = points_dictionary['geo_reference']
1395    else:
1396        geo = None
1397
1398    return Geospatial_data(points_dictionary['pointlist'],
1399                           points_dictionary['attributelist'],
1400                           geo_reference = geo)
1401
1402
1403##
1404# @brief Ensure that points are in absolute coordinates.
1405# @param points A list or array of points to check, or geospatial object.
1406# @param geo_reference If supplied,
1407# @return ??
1408def ensure_absolute(points, geo_reference=None):
1409    """Ensure that points are in absolute coordinates.
1410
1411    This function inputs several formats and
1412    outputs one format. - a numeric array of absolute points.
1413
1414    Input formats are;
1415      points: List or numeric array of coordinate pairs [xi, eta] of
1416              points or geospatial object or points file name
1417
1418    mesh_origin: A geo_reference object or 3-tuples consisting of
1419                 UTM zone, easting and northing.
1420                 If specified vertex coordinates are assumed to be
1421                 relative to their respective origins.
1422    """
1423
1424    # Input check
1425    if isinstance(points, basestring):
1426        # It's a string - assume it is a point file
1427        points = Geospatial_data(file_name=points)
1428
1429    if isinstance(points, Geospatial_data):
1430        points = points.get_data_points(absolute=True)
1431        msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1432        assert geo_reference == None, msg
1433    else:
1434        points = ensure_numeric(points, num.float)
1435
1436    # Sort of geo_reference and convert points
1437    if geo_reference is None:
1438        geo = None # Geo_reference()
1439    else:
1440        if isinstance(geo_reference, Geo_reference):
1441            geo = geo_reference
1442        else:
1443            geo = Geo_reference(geo_reference[0],
1444                                geo_reference[1],
1445                                geo_reference[2])
1446        points = geo.get_absolute(points)
1447
1448    return points
1449
1450
1451##
1452# @brief
1453# @param points
1454# @param geo_reference
1455# @return A geospatial object.
1456def ensure_geospatial(points, geo_reference=None):
1457    """Convert various data formats to a geospatial_data instance.
1458
1459    Inputed formats are;
1460    points:      List or numeric array of coordinate pairs [xi, eta] of
1461                 points or geospatial object
1462
1463    mesh_origin: A geo_reference object or 3-tuples consisting of
1464                 UTM zone, easting and northing.
1465                 If specified vertex coordinates are assumed to be
1466                 relative to their respective origins.
1467    """
1468
1469    # Input check
1470    if isinstance(points, Geospatial_data):
1471        msg = "Use a Geospatial_data object or a mesh origin, not both."
1472        assert geo_reference is None, msg
1473        return points
1474    else:
1475        # List or numeric array of absolute points
1476        points = ensure_numeric(points, num.float)
1477
1478    # Sort out geo reference
1479    if geo_reference is None:
1480        geo = None
1481    else:
1482        if isinstance(geo_reference, Geo_reference):
1483            geo = geo_reference
1484        else:
1485            geo = Geo_reference(geo_reference[0],
1486                                geo_reference[1],
1487                                geo_reference[2])
1488
1489    # Create Geospatial_data object with appropriate geo reference and return
1490    points = Geospatial_data(data_points=points, geo_reference=geo)
1491
1492    return points
1493
1494
1495##
1496# @brief
1497# @param data_file
1498# @param alpha_list
1499# @param mesh_file
1500# @param boundary_poly
1501# @param mesh_resolution
1502# @param north_boundary
1503# @param south_boundary
1504# @param east_boundary
1505# @param west_boundary
1506# @param plot_name
1507# @param split_factor
1508# @param seed_num
1509# @param cache
1510# @param verbose
1511def find_optimal_smoothing_parameter(data_file,
1512                                     alpha_list=None,
1513                                     mesh_file=None,
1514                                     boundary_poly=None,
1515                                     mesh_resolution=100000,
1516                                     north_boundary=None,
1517                                     south_boundary=None,
1518                                     east_boundary=None,
1519                                     west_boundary=None,
1520                                     plot_name='all_alphas',
1521                                     split_factor=0.1,
1522                                     seed_num=None,
1523                                     cache=False,
1524                                     verbose=False):
1525    """Removes a small random sample of points from 'data_file'.
1526    Then creates models with different alpha values from 'alpha_list' and
1527    cross validates the predicted value to the previously removed point data.
1528    Returns the alpha value which has the smallest covariance.
1529
1530    data_file: must not contain points outside the boundaries defined
1531               and it either a pts, txt or csv file.
1532
1533    alpha_list: the alpha values to test in a single list
1534
1535    mesh_file: name of the created mesh file or if passed in will read it.
1536               NOTE, if there is a mesh file mesh_resolution,
1537               north_boundary, south... etc will be ignored.
1538
1539    mesh_resolution: the maximum area size for a triangle
1540
1541    north_boundary... west_boundary: the value of the boundary
1542
1543    plot_name: the name for the plot contain the results
1544
1545    seed_num: the seed to the random number generator
1546
1547    USAGE:
1548        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1549                                             alpha_list=[0.0001, 0.01, 1],
1550                                             mesh_file=None,
1551                                             mesh_resolution=3,
1552                                             north_boundary=5,
1553                                             south_boundary=-5,
1554                                             east_boundary=5,
1555                                             west_boundary=-5,
1556                                             plot_name='all_alphas',
1557                                             seed_num=100000,
1558                                             verbose=False)
1559
1560    OUTPUT: returns the minumum normalised covalance calculate AND the
1561           alpha that created it. PLUS writes a plot of the results
1562
1563    NOTE: code will not work if the data_file extent is greater than the
1564    boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1565    """
1566
1567    from anuga.shallow_water import Domain
1568    from anuga.geospatial_data.geospatial_data import Geospatial_data
1569    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1570    from anuga.utilities.numerical_tools import cov
1571    from anuga.utilities.polygon import is_inside_polygon
1572    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1573
1574
1575    attribute_smoothed = 'elevation'
1576
1577    if mesh_file is None:
1578        if verbose: print "building mesh"
1579        mesh_file = 'temp.msh'
1580
1581        if north_boundary is None or south_boundary is None \
1582           or east_boundary is None or west_boundary is None:
1583            no_boundary = True
1584        else:
1585            no_boundary = False
1586
1587        if no_boundary is True:
1588            msg = 'All boundaries must be defined'
1589            raise Expection, msg
1590
1591        poly_topo = [[east_boundary,south_boundary],
1592                     [east_boundary,north_boundary],
1593                     [west_boundary,north_boundary],
1594                     [west_boundary,south_boundary]]
1595
1596        create_mesh_from_regions(poly_topo,
1597                                 boundary_tags={'back': [2],
1598                                                'side': [1,3],
1599                                                'ocean': [0]},
1600                                 maximum_triangle_area=mesh_resolution,
1601                                 filename=mesh_file,
1602                                 use_cache=cache,
1603                                 verbose=verbose)
1604
1605    else: # if mesh file provided
1606        # test mesh file exists?
1607        if verbose: "reading from file: %s" % mesh_file
1608        if access(mesh_file,F_OK) == 0:
1609            msg = "file %s doesn't exist!" % mesh_file
1610            raise IOError, msg
1611
1612    # split topo data
1613    if verbose: print 'Reading elevation file: %s' % data_file
1614    G = Geospatial_data(file_name = data_file)
1615    if verbose: print 'Start split'
1616    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1617    if verbose: print 'Finish split'
1618    points = G_small.get_data_points()
1619
1620    if verbose: print "Number of points in sample to compare: ", len(points)
1621
1622    if alpha_list == None:
1623        alphas = [0.001,0.01,100]
1624        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1625         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1626    else:
1627        alphas = alpha_list
1628
1629    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1630    # 4 onwards is the elevation_predicted using the alpha, which will
1631    # be compared later against the real removed data
1632    data = num.array([], dtype=num.float)
1633
1634    data = num.resize(data, (len(points), 3+len(alphas)))
1635
1636    # gets relative point from sample
1637    data[:,0] = points[:,0]
1638    data[:,1] = points[:,1]
1639    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1640    data[:,2] = elevation_sample
1641
1642    normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1643
1644    if verbose: print 'Setup computational domains with different alphas'
1645
1646    for i, alpha in enumerate(alphas):
1647        # add G_other data to domains with different alphas
1648        if verbose:
1649            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1650        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1651        if verbose: print domain.statistics()
1652        domain.set_quantity(attribute_smoothed,
1653                            geospatial_data=G_other,
1654                            use_cache=cache,
1655                            verbose=verbose,
1656                            alpha=alpha)
1657
1658        # Convert points to geospatial data for use with get_values below
1659        points_geo = Geospatial_data(points, domain.geo_reference)
1660
1661        # returns the predicted elevation of the points that were "split" out
1662        # of the original data set for one particular alpha
1663        if verbose: print 'Get predicted elevation for location to be compared'
1664        elevation_predicted = \
1665                domain.quantities[attribute_smoothed].\
1666                    get_values(interpolation_points=points_geo)
1667
1668        # add predicted elevation to array that starts with x, y, z...
1669        data[:,i+3] = elevation_predicted
1670
1671        sample_cov = cov(elevation_sample)
1672        ele_cov = cov(elevation_sample - elevation_predicted)
1673        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1674
1675        if verbose:
1676            print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1677                      normal_cov[i][1]
1678            print '-------------------------------------------- \n'
1679
1680    normal_cov0 = normal_cov[:,0]
1681    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1682
1683    if plot_name is not None:
1684        from pylab import savefig, semilogx, loglog
1685
1686        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1687        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1688        savefig(plot_name, dpi=300)
1689
1690    if mesh_file == 'temp.msh':
1691        remove(mesh_file)
1692
1693    if verbose:
1694        print 'Final results:'
1695        for i, alpha in enumerate(alphas):
1696            print 'covariance for alpha %s = %s ' \
1697                  % (normal_cov[i][0], normal_cov[i][1])
1698        print '\n Optimal alpha is: %s ' \
1699              % normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0]
1700
1701    # covariance and optimal alpha
1702    return (min(normal_cov_new[:,1]),
1703            normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1704
1705
1706##
1707# @brief
1708# @param data_file
1709# @param alpha_list
1710# @param mesh_file
1711# @param boundary_poly
1712# @param mesh_resolution
1713# @param north_boundary
1714# @param south_boundary
1715# @param east_boundary
1716# @param west_boundary
1717# @param plot_name
1718# @param split_factor
1719# @param seed_num
1720# @param cache
1721# @param verbose
1722def old_find_optimal_smoothing_parameter(data_file,
1723                                         alpha_list=None,
1724                                         mesh_file=None,
1725                                         boundary_poly=None,
1726                                         mesh_resolution=100000,
1727                                         north_boundary=None,
1728                                         south_boundary=None,
1729                                         east_boundary=None,
1730                                         west_boundary=None,
1731                                         plot_name='all_alphas',
1732                                         split_factor=0.1,
1733                                         seed_num=None,
1734                                         cache=False,
1735                                         verbose=False):
1736    """
1737    data_file: must not contain points outside the boundaries defined
1738               and it either a pts, txt or csv file.
1739
1740    alpha_list: the alpha values to test in a single list
1741
1742    mesh_file: name of the created mesh file or if passed in will read it.
1743               NOTE, if there is a mesh file mesh_resolution,
1744               north_boundary, south... etc will be ignored.
1745
1746    mesh_resolution: the maximum area size for a triangle
1747
1748    north_boundary... west_boundary: the value of the boundary
1749
1750    plot_name: the name for the plot contain the results
1751
1752    seed_num: the seed to the random number generator
1753
1754    USAGE:
1755        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1756                                             alpha_list=[0.0001, 0.01, 1],
1757                                             mesh_file=None,
1758                                             mesh_resolution=3,
1759                                             north_boundary=5,
1760                                             south_boundary=-5,
1761                                             east_boundary=5,
1762                                             west_boundary=-5,
1763                                             plot_name='all_alphas',
1764                                             seed_num=100000,
1765                                             verbose=False)
1766
1767    OUTPUT: returns the minumum normalised covalance calculate AND the
1768            alpha that created it. PLUS writes a plot of the results
1769
1770    NOTE: code will not work if the data_file extend is greater than the
1771          boundary_polygon or the north_boundary...west_boundary
1772    """
1773
1774    from anuga.shallow_water import Domain
1775    from anuga.geospatial_data.geospatial_data import Geospatial_data
1776    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1777    from anuga.utilities.numerical_tools import cov
1778    from anuga.utilities.polygon import is_inside_polygon
1779    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1780
1781
1782    attribute_smoothed = 'elevation'
1783
1784    if mesh_file is None:
1785        mesh_file = 'temp.msh'
1786
1787        if north_boundary is None or south_boundary is None \
1788           or east_boundary is None or west_boundary is None:
1789            no_boundary = True
1790        else:
1791            no_boundary = False
1792
1793        if no_boundary is True:
1794            msg = 'All boundaries must be defined'
1795            raise Expection, msg
1796
1797        poly_topo = [[east_boundary,south_boundary],
1798                     [east_boundary,north_boundary],
1799                     [west_boundary,north_boundary],
1800                     [west_boundary,south_boundary]]
1801
1802        create_mesh_from_regions(poly_topo,
1803                                 boundary_tags={'back': [2],
1804                                                'side': [1,3],
1805                                                'ocean': [0]},
1806                                 maximum_triangle_area=mesh_resolution,
1807                                 filename=mesh_file,
1808                                 use_cache=cache,
1809                                 verbose=verbose)
1810
1811    else: # if mesh file provided
1812        # test mesh file exists?
1813        if access(mesh_file,F_OK) == 0:
1814            msg = "file %s doesn't exist!" % mesh_file
1815            raise IOError, msg
1816
1817    # split topo data
1818    G = Geospatial_data(file_name=data_file)
1819    if verbose: print 'start split'
1820    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1821    if verbose: print 'finish split'
1822    points = G_small.get_data_points()
1823
1824    # FIXME: Remove points outside boundary polygon
1825#    print 'new point',len(points)
1826#
1827#    new_points=[]
1828#    new_points=array([],dtype=float)
1829#    new_points=resize(new_points,(len(points),2))
1830#    print "BOUNDARY", boundary_poly
1831#    for i,point in enumerate(points):
1832#        if is_inside_polygon(point,boundary_poly, verbose=True):
1833#            new_points[i] = point
1834#            print"WOW",i,new_points[i]
1835#    points = new_points
1836
1837    if verbose: print "Number of points in sample to compare: ", len(points)
1838
1839    if alpha_list == None:
1840        alphas = [0.001,0.01,100]
1841        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1842         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1843    else:
1844        alphas = alpha_list
1845
1846    domains = {}
1847
1848    if verbose: print 'Setup computational domains with different alphas'
1849
1850    for alpha in alphas:
1851        # add G_other data to domains with different alphas
1852        if verbose:
1853            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1854        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1855        if verbose: print domain.statistics()
1856        domain.set_quantity(attribute_smoothed,
1857                            geospatial_data=G_other,
1858                            use_cache=cache,
1859                            verbose=verbose,
1860                            alpha=alpha)
1861        domains[alpha] = domain
1862
1863    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1864    # 4 onwards is the elevation_predicted using the alpha, which will
1865    # be compared later against the real removed data
1866    data = num.array([], dtype=num.float)
1867
1868    data = num.resize(data, (len(points), 3+len(alphas)))
1869
1870    # gets relative point from sample
1871    data[:,0] = points[:,0]
1872    data[:,1] = points[:,1]
1873    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1874    data[:,2] = elevation_sample
1875
1876    normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1877
1878    if verbose:
1879        print 'Determine difference between predicted results and actual data'
1880    for i, alpha in enumerate(domains):
1881        if verbose: print'Alpha =', alpha
1882
1883        points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1884        # returns the predicted elevation of the points that were "split" out
1885        # of the original data set for one particular alpha
1886        elevation_predicted = \
1887                domains[alpha].quantities[attribute_smoothed].\
1888                        get_values(interpolation_points=points_geo)
1889
1890        # add predicted elevation to array that starts with x, y, z...
1891        data[:,i+3] = elevation_predicted
1892
1893        sample_cov = cov(elevation_sample)
1894        ele_cov = cov(elevation_sample - elevation_predicted)
1895        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1896        print 'memory usage during compare', mem_usage()
1897        if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1898
1899    normal_cov0 = normal_cov[:,0]
1900    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0), axis=0)
1901
1902    if plot_name is not None:
1903        from pylab import savefig,semilogx,loglog
1904
1905        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1906        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1907        savefig(plot_name, dpi=300)
1908    if mesh_file == 'temp.msh':
1909        remove(mesh_file)
1910
1911    return (min(normal_cov_new[:,1]),
1912            normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1913
1914
1915if __name__ == "__main__":
1916    pass
1917
Note: See TracBrowser for help on using the repository browser.