source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6306

Last change on this file since 6306 was 6304, checked in by rwilson, 16 years ago

Initial commit of numpy changes. Still a long way to go.

File size: 70.3 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5
6from sys import maxint
7from os import access, F_OK, R_OK,remove
8from types import DictType
9from warnings import warn
10from string import lower
11from RandomArray import randint, seed, get_seed
12from copy import deepcopy
13from Scientific.IO.NetCDF import NetCDFFile
14
15import numpy as num
16
17from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18from anuga.utilities.numerical_tools import ensure_numeric
19from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22from anuga.utilities.anuga_exceptions import ANUGAError
23from anuga.config import points_file_block_line_size as MAX_READ_LINES
24from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
25from anuga.config import netcdf_float
26
27DEFAULT_ATTRIBUTE = 'elevation'
28
29
30##
31# @brief ??
32class Geospatial_data:
33
34    ##
35    # @brief
36    # @param data_points Mx2 iterable of tuples or array of x,y coordinates.
37    # @param attributes Associated values for each data point.
38    # @param geo_reference ??
39    # @param default_attribute_name ??
40    # @param file_name
41    # @param latitudes ??
42    # @param longitudes ??
43    # @param points_are_lats_longs True if points are lat/long, not UTM.
44    # @param max_read_lines Size of block to read, if blocking.
45    # @param load_file_now True if blocking but we eant to read file now.
46    # @param verbose True if this class instance is verbose.
47    def __init__(self,
48                 data_points=None, # this can also be a points file name
49                 attributes=None,
50                 geo_reference=None,
51                 default_attribute_name=None,
52                 file_name=None,
53                 latitudes=None,
54                 longitudes=None,
55                 points_are_lats_longs=False,
56                 max_read_lines=None,
57                 load_file_now=True,
58                 verbose=False):
59        """Create instance from data points and associated attributes
60
61        data_points: x,y coordinates in meters. Type must be either a
62        sequence of 2-tuples or an Mx2 numeric array of floats.  A file name
63        with extension .txt, .cvs or .pts can also be passed in here.
64
65        attributes: Associated values for each data point. The type
66        must be either a list or an array of length M or a dictionary
67        of lists (or arrays) of length M. In the latter case the keys
68        in the dictionary represent the attribute names, in the former
69        the attribute will get the default name "elevation".
70
71        geo_reference: Object representing the origin of the data
72        points. It contains UTM zone, easting and northing and data
73        points are assumed to be relative to this origin.
74        If geo_reference is None, the default geo ref object is used.
75
76        default_attribute_name: Name of default attribute to be used with
77        get_attribute_values. The idea is that the dataset can be
78        equipped with information about which attribute to return.
79        If None, the default is the "first"
80
81        latitudes, longitudes: Vectors of latitudes and longitudes,
82        used to specify location instead of points.
83
84        points_are_lats_longs: Set this as true if the points are actually
85        lats and longs, not UTM
86
87        max_read_lines: The number of rows read into memory when using
88        blocking to read a file.
89
90        load_file_now:  If true the file is automatically loaded
91        into the geospatial instance. Used when blocking.
92
93        file_name: Name of input netCDF file or .txt file. netCDF file must
94        have dimensions "points" etc.
95        .txt file is a comma seperated file with x, y and attribute
96        data.
97
98        The first line has the titles of the columns.  The first two
99        column titles are checked to see if they start with lat or
100        long (not case sensitive).  If so the data is assumed to be
101        latitude and longitude, in decimal format and converted to
102        UTM.  Otherwise the first two columns are assumed to be the x
103        and y, and the title names acually used are ignored.
104
105
106        The format for a .txt file is:
107            1st line:     [column names]
108            other lines:  x y [attributes]
109
110            for example:
111            x, y, elevation, friction
112            0.6, 0.7, 4.9, 0.3
113            1.9, 2.8, 5, 0.3
114            2.7, 2.4, 5.2, 0.3
115
116        The first two columns have to be x, y or lat, long
117        coordinates.
118
119
120        The format for a Points dictionary is:
121          ['pointlist'] a 2 column array describing points. 1st column x,
122          2nd column y.
123          ['attributelist'], a dictionary of 1D arrays, representing
124          attribute values at the point.  The dictionary key is the attribute
125          header.
126          ['geo_reference'] a Geo_refernece object. Use if the point
127          information is relative. This is optional.
128            eg
129            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
130            dic['attributelist']['elevation'] = [[7.0,5.0]
131
132        verbose:
133        """
134
135        if isinstance(data_points, basestring):
136            # assume data_points is really a file name
137            file_name = data_points
138
139        self.set_verbose(verbose)
140        self.geo_reference = None # create the attribute
141        self.file_name = file_name
142
143        if max_read_lines is None:
144            self.max_read_lines = MAX_READ_LINES
145        else:
146            self.max_read_lines = max_read_lines
147
148        if file_name is None:
149            if latitudes is not None \
150               or longitudes is not None \
151               or points_are_lats_longs:
152                data_points, geo_reference = \
153                    _set_using_lat_long(latitudes=latitudes,
154                                        longitudes=longitudes,
155                                        geo_reference=geo_reference,
156                                        data_points=data_points,
157                                        points_are_lats_longs=\
158                                        points_are_lats_longs)
159            self.check_data_points(data_points)
160            self.set_attributes(attributes)
161            self.set_geo_reference(geo_reference)
162            self.set_default_attribute_name(default_attribute_name)
163        elif load_file_now is True:
164            # watch for case where file name and points,
165            # attributes etc are provided!!
166            # if file name then all provided info will be removed!
167
168            if verbose is True:
169                if file_name is not None:
170                    print 'Loading Geospatial data from file: %s' % file_name
171
172            self.import_points_file(file_name, verbose=verbose)
173
174            self.check_data_points(self.data_points)
175            self.set_attributes(self.attributes)
176            self.set_geo_reference(self.geo_reference)
177            self.set_default_attribute_name(default_attribute_name)
178
179        if verbose is True:
180            if file_name is not None:
181                print 'Geospatial data created from file: %s' % file_name
182                if load_file_now is False:
183                    print 'Data will be loaded blockwise on demand'
184
185                    if file_name.endswith('csv') or file_name.endswith('txt'):
186                        pass
187                        # This message was misleading.
188                        # FIXME (Ole): Are we blocking here or not?
189                        # print 'ASCII formats are not that great for '
190                        # print 'blockwise reading. Consider storing this'
191                        # print 'data as a pts NetCDF format'
192
193    ##
194    # @brief Return length of the points set.
195    def __len__(self):
196        return len(self.data_points)
197
198    ##
199    # @brief Return a string representation of the points set.
200    def __repr__(self):
201        return str(self.get_data_points(absolute=True))
202
203    ##
204    # @brief Check data points.
205    # @param data_points Points data to check and store in instance.
206    # @note Throws ValueError exception if no data.
207    def check_data_points(self, data_points):
208        """Checks data points"""
209
210        if data_points is None:
211            self.data_points = None
212            msg = 'There is no data or file provided!'
213            raise ValueError, msg
214
215        else:
216            self.data_points = ensure_numeric(data_points)
217            return
218
219            print 'self.data_points=%s' % str(self.data_points)
220            print 'self.data_points.shape=%s' % str(self.data_points.shape)
221            if not (0,) == self.data_points.shape:
222                assert len(self.data_points.shape) == 2
223                assert self.data_points.shape[1] == 2
224
225    ##
226    # @brief Check and assign attributes data.
227    # @param attributes Dictionary or scalar to save as .attributes.
228    # @note Throws exception if unable to convert dict keys to numeric.
229    def set_attributes(self, attributes):
230        """Check and assign attributes dictionary"""
231
232        if attributes is None:
233            self.attributes = None
234            return
235
236        if not isinstance(attributes, DictType):
237            # Convert single attribute into dictionary
238            attributes = {DEFAULT_ATTRIBUTE: attributes}
239
240        # Check input attributes
241        for key in attributes.keys():
242            try:
243                attributes[key] = ensure_numeric(attributes[key])
244            except:
245                msg = ("Attribute '%s' (%s) could not be converted to a"
246                       "numeric vector" % (str(key), str(attributes[key])))
247                raise Exception, msg
248
249        self.attributes = attributes
250
251    ##
252    # @brief Set the georeference of geospatial data.
253    # @param geo_reference The georeference data to set.
254    # @note Will raise exception if param not instance of Geo_reference.
255    def set_geo_reference(self, geo_reference):
256        """Set the georeference of geospatial data.
257
258        It can also be used to change the georeference and will ensure that
259        the absolute coordinate values are unchanged.
260        """
261
262        from anuga.coordinate_transforms.geo_reference import Geo_reference
263
264        if geo_reference is None:
265            # Use default - points are in absolute coordinates
266            geo_reference = Geo_reference()
267
268        # Allow for tuple (zone, xllcorner, yllcorner)
269        geo_reference = ensure_geo_reference(geo_reference)
270
271        if not isinstance(geo_reference, Geo_reference):
272            # FIXME (Ole): This exception will be raised even
273            # if geo_reference is None. Is that the intent Duncan?
274            msg = 'Argument geo_reference must be a valid Geo_reference '
275            msg += 'object or None.'
276            raise Expection, msg
277
278        # If a geo_reference already exists, change the point data according to
279        # the new geo reference
280        if  self.geo_reference is not None:
281            self.data_points = self.get_data_points(geo_reference=geo_reference)
282
283        self.geo_reference = geo_reference
284
285    ##
286    # @brief Set default attribute name.
287    # @param default_attribute_name The default to save.
288    def set_default_attribute_name(self, default_attribute_name):
289        self.default_attribute_name = default_attribute_name
290
291    ##
292    # @brief Set the instance verbose flag.
293    # @param verbose The value to save.
294    # @note Will raise exception if param is not True or False.
295    def set_verbose(self, verbose=False):
296        if verbose in [False, True]:
297            self.verbose = verbose
298        else:
299            msg = 'Illegal value: %s' % str(verbose)
300            raise Exception, msg
301
302    ##
303    # @brief Clip geospatial data by a given polygon.
304    # @param polygon The polygon to clip with.
305    # @param closed True if points on clip boundary are not included in result.
306    # @param verbose True if this function is verbose.
307    def clip(self, polygon, closed=True, verbose=False):
308        """Clip geospatial data by a polygon
309
310        Input
311          polygon - Either a list of points, an Nx2 array or
312                    a Geospatial data object.
313          closed - (optional) determine whether points on boundary should be
314          regarded as belonging to the polygon (closed = True)
315          or not (closed = False). Default is True.
316
317        Output
318          New geospatial data object representing points inside
319          specified polygon.
320
321
322        Note - this method is non-destructive and leaves the data in 'self'
323        unchanged
324        """
325
326        from anuga.utilities.polygon import inside_polygon
327
328        if isinstance(polygon, Geospatial_data):
329            # Polygon is an object - extract points
330            polygon = polygon.get_data_points()
331
332        points = self.get_data_points()
333        inside_indices = inside_polygon(points, polygon, closed, verbose)
334
335        clipped_G = self.get_sample(inside_indices)
336
337        return clipped_G
338
339    ##
340    # @brief Clip points data by polygon, return points outside polygon.
341    # @param polygon The polygon to clip with.
342    # @param closed True if points on clip boundary are not included in result.
343    # @param verbose True if this function is verbose.
344    def clip_outside(self, polygon, closed=True, verbose=False):
345        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
346
347        Input
348          polygon - Either a list of points, an Nx2 array or
349                    a Geospatial data object.
350          closed - (optional) determine whether points on boundary should be
351          regarded as belonging to the polygon (closed = True)
352          or not (closed = False). Default is True.
353
354        Output
355          Geospatial data object representing point OUTSIDE specified polygon
356        """
357
358        from anuga.utilities.polygon import outside_polygon
359
360        if isinstance(polygon, Geospatial_data):
361            # Polygon is an object - extract points
362            polygon = polygon.get_data_points()
363
364        points = self.get_data_points()
365        outside_indices = outside_polygon(points, polygon, closed,verbose)
366
367        clipped_G = self.get_sample(outside_indices)
368
369        return clipped_G
370
371    ##
372    # @brief Get instance geo_reference data.
373    def get_geo_reference(self):
374        return self.geo_reference
375
376    ##
377    # @brief Get coordinates for all data points as an Nx2 array.
378    # @param absolute If True, return UTM, else relative to xll/yll corners.
379    # @param geo_reference If supplied, points are relative to it.
380    # @param as_lat_long If True, return points as lat/lon.
381    # @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
382    # @return A set of data points, in appropriate form.
383    def get_data_points(self, absolute=True, geo_reference=None,
384                        as_lat_long=False, isSouthHemisphere=True):
385        """Get coordinates for all data points as an Nx2 array
386
387        If absolute is False returned coordinates are relative to the
388        internal georeference's xll and yll corners, otherwise
389        absolute UTM coordinates are returned.
390
391        If a geo_reference is passed the points are returned relative
392        to that geo_reference.
393
394        isSH (isSouthHemisphere) is only used when getting data
395        points "as_lat_long" is True and if FALSE will return lats and
396        longs valid for the Northern Hemisphere.
397
398        Default: absolute is True.
399        """
400
401        if as_lat_long is True:
402            msg = "Points need a zone to be converted into lats and longs"
403            assert self.geo_reference is not None, msg
404            zone = self.geo_reference.get_zone()
405            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
406            lats_longs = []
407            for point in self.get_data_points(True):
408                # UTMtoLL(northing, easting, zone,
409                lat_calced, long_calced = UTMtoLL(point[1], point[0],
410                                                  zone, isSouthHemisphere)
411                lats_longs.append((lat_calced, long_calced)) # to hash
412            return lats_longs
413
414        if absolute is True and geo_reference is None:
415            return self.geo_reference.get_absolute(self.data_points)
416        elif geo_reference is not None:
417            return geo_reference.change_points_geo_ref(self.data_points,
418                                                       self.geo_reference)
419        else:
420            # If absolute is False
421            return self.data_points
422
423    ##
424    # @brief Get value for attribute name.
425    # @param attribute_name Name to get value for.
426    # @note If name passed is None, return default attribute value.
427    def get_attributes(self, attribute_name=None):
428        """Return values for one named attribute.
429
430        If attribute_name is None, default_attribute_name is used
431        """
432
433        if attribute_name is None:
434            if self.default_attribute_name is not None:
435                attribute_name = self.default_attribute_name
436            else:
437                attribute_name = self.attributes.keys()[0]
438                # above line takes the first one from keys
439
440        if self.verbose is True:
441            print 'Using attribute %s' %attribute_name
442            print 'Available attributes: %s' %(self.attributes.keys())
443
444        msg = 'Attribute name %s does not exist in data set' % attribute_name
445        assert self.attributes.has_key(attribute_name), msg
446
447        return self.attributes[attribute_name]
448
449    ##
450    # @brief Get all instance attributes.
451    # @return The instance attribute dictionary, or None if no attributes.
452    def get_all_attributes(self):
453        """Return values for all attributes.
454        The return value is either None or a dictionary (possibly empty).
455        """
456
457        return self.attributes
458
459    ##
460    # @brief Override __add__() to allow addition of geospatial objects.
461    # @param self This object.
462    # @param other The second object.
463    # @return The new geospatial object.
464    def __add__(self, other):
465        """Returns the addition of 2 geospatical objects,
466        objects are concatencated to the end of each other
467
468        NOTE: doesn't add if objects contain different
469        attributes
470
471        Always return absolute points!
472        This also means, that if you add None to the object,
473        it will be turned into absolute coordinates
474
475        other can be None in which case nothing is added to self.
476        """
477
478        # find objects zone and checks if the same
479        geo_ref1 = self.get_geo_reference()
480        zone1 = geo_ref1.get_zone()
481
482        if other is not None:
483            geo_ref2 = other.get_geo_reference()
484            zone2 = geo_ref2.get_zone()
485            geo_ref1.reconcile_zones(geo_ref2)
486            new_points = num.concatenate((self.get_data_points(absolute=True),
487                                          other.get_data_points(absolute=True)),
488                                         axis = 0)
489
490            # Concatenate attributes if any
491            if self.attributes is None:
492                if other.attributes is not None:
493                    msg = 'Geospatial data must have the same '
494                    msg += 'attributes to allow addition.'
495                    raise Exception, msg
496
497                new_attributes = None
498            else:
499                new_attributes = {}
500                for x in self.attributes.keys():
501                    if other.attributes.has_key(x):
502                        attrib1 = self.attributes[x]
503                        attrib2 = other.attributes[x]
504                        new_attributes[x] = num.concatenate((attrib1, attrib2))
505                    else:
506                        msg = 'Geospatial data must have the same \n'
507                        msg += 'attributes to allow addition.'
508                        raise Exception, msg
509        else:
510            # other is None:
511            new_points = self.get_data_points(absolute=True)
512            new_attributes = self.attributes
513
514        # Instantiate new data object and return absolute coordinates
515        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
516        return Geospatial_data(new_points, new_attributes, new_geo_ref)
517
518    ##
519    # @brief Override the addition case where LHS isn't geospatial object.
520    # @param self This object.
521    # @param other The second object.
522    # @return The new geospatial object.
523    def __radd__(self, other):
524        """Handle cases like None + Geospatial_data(...)"""
525
526        return self + other
527
528    ###
529    #  IMPORT/EXPORT POINTS FILES
530    ###
531
532    ##
533    # @brief Import a .txt, .csv or .pts points data file.
534    # @param file_name
535    # @param delimiter
536    # @param verbose True if this function is to be verbose.
537    # @note Will throw IOError or SyntaxError if there is a problem.
538    def import_points_file(self, file_name, delimiter=None, verbose=False):
539        """ load an .txt, .csv or .pts file
540
541        Note: will throw an IOError/SyntaxError if it can't load the file.
542        Catch these!
543
544        Post condition: self.attributes dictionary has been set
545        """
546
547        if access(file_name, F_OK) == 0 :
548            msg = 'File %s does not exist or is not accessible' % file_name
549            raise IOError, msg
550
551        attributes = {}
552        if file_name[-4:]== ".pts":
553            try:
554                data_points, attributes, geo_reference = \
555                             _read_pts_file(file_name, verbose)
556            except IOError, e:
557                msg = 'Could not open file %s ' % file_name
558                raise IOError, msg
559        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
560            try:
561                data_points, attributes, geo_reference = \
562                             _read_csv_file(file_name, verbose)
563            except IOError, e:
564                # This should only be if a file is not found
565                msg = 'Could not open file %s. ' % file_name
566                msg += 'Check the file location.'
567                raise IOError, msg
568            except SyntaxError, e:
569                # This should only be if there is a format error
570                msg = 'Problem with format of file %s. \n' %file_name
571                msg += Error_message['IOError']
572                raise SyntaxError, msg
573        else:
574            msg = 'Extension %s is unknown' %file_name[-4:]
575            raise IOError, msg
576
577        self.data_points = data_points
578        self.attributes = attributes
579        self.geo_reference = geo_reference
580
581    ##
582    # @brief Write points data to a file (.csv or .pts).
583    # @param file_name Path to file to write.
584    # @param absolute ??
585    # @param as_lat_long ??
586    # @param isSouthHemisphere ??
587    def export_points_file(self, file_name, absolute=True,
588                           as_lat_long=False, isSouthHemisphere=True):
589
590        """write a points file as a text (.csv) or binary (.pts) file
591
592        file_name is the file name, including the extension
593        The point_dict is defined at the top of this file.
594
595        If absolute is True data the xll and yll are added to the points value
596        and the xll and yll of the geo_reference are set to 0.
597
598        If absolute is False data points at returned as relative to the xll
599        and yll and geo_reference remains uneffected
600
601        isSouthHemisphere: is only used when getting data
602        points "as_lat_long" is True and if FALSE will return lats and
603        longs valid for the Northern Hemisphere.
604        """
605
606        if (file_name[-4:] == ".pts"):
607            if absolute is True:
608                geo_ref = deepcopy(self.geo_reference)
609                geo_ref.xllcorner = 0
610                geo_ref.yllcorner = 0
611                _write_pts_file(file_name,
612                                self.get_data_points(absolute),
613                                self.get_all_attributes(),
614                                geo_ref)
615            else:
616                _write_pts_file(file_name,
617                                self.get_data_points(absolute),
618                                self.get_all_attributes(),
619                                self.get_geo_reference())
620
621        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
622            msg = "ERROR: trying to write a .txt file with relative data."
623            assert absolute, msg
624            _write_csv_file(file_name,
625                            self.get_data_points(absolute=True,
626                                                 as_lat_long=as_lat_long,
627                                  isSouthHemisphere=isSouthHemisphere),
628                            self.get_all_attributes(),
629                            as_lat_long=as_lat_long)
630
631        elif file_name[-4:] == ".urs" :
632            msg = "ERROR: Can not write a .urs file as a relative file."
633            assert absolute, msg
634            _write_urs_file(file_name,
635                            self.get_data_points(as_lat_long=True,
636                                  isSouthHemisphere=isSouthHemisphere))
637
638        else:
639            msg = 'Unknown file type %s ' %file_name
640            raise IOError, msg
641
642    ##
643    # @brief Get a subset of data that is referred to by 'indices'.
644    # @param indices A list of indices to select data subset with.
645    # @return A geospatial object containing data subset.
646    def get_sample(self, indices):
647        """ Returns a object which is a subset of the original
648        and the data points and attributes in this new object refer to
649        the indices provided
650
651        Input
652            indices- a list of integers that represent the new object
653        Output
654            New geospatial data object representing points specified by
655            the indices
656        """
657
658        # FIXME: add the geo_reference to this
659        points = self.get_data_points()
660        sampled_points = num.take(points, indices)
661
662        attributes = self.get_all_attributes()
663
664        sampled_attributes = {}
665        if attributes is not None:
666            for key, att in attributes.items():
667                sampled_attributes[key] = num.take(att, indices)
668
669        return Geospatial_data(sampled_points, sampled_attributes)
670
671    ##
672    # @brief Split one geospatial object into two.
673    # @param factor Relative size to make first result object.
674    # @param seed_num Random 'seed' - used only for unit test.
675    # @param verbose True if this function is to be verbose.
676    # @note Points in each result object are selected randomly.
677    def split(self, factor=0.5, seed_num=None, verbose=False):
678        """Returns two geospatial_data object, first is the size of the 'factor'
679        smaller the original and the second is the remainder. The two
680        new objects are disjoint sets of each other.
681
682        Points of the two new object have selected RANDOMLY.
683
684        This method create two lists of indices which are passed into
685        get_sample.  The lists are created using random numbers, and
686        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
687        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
688
689        Input -  the factor which to split the object, if 0.1 then 10% of the
690                 together object will be returned
691
692        Output - two geospatial_data objects that are disjoint sets of the
693                 original
694        """
695
696        i=0
697        self_size = len(self)
698        random_list = []
699        remainder_list = []
700        new_size = round(factor*self_size)
701
702        # Find unique random numbers
703        if verbose: print "make unique random number list and get indices"
704
705        total=num.array(range(self_size))
706        total_list = total.tolist()
707
708        if verbose: print "total list len", len(total_list)
709
710        # There will be repeated random numbers however will not be a
711        # problem as they are being 'pop'ed out of array so if there
712        # are two numbers the same they will pop different indicies,
713        # still basically random
714        ## create list of non-unquie random numbers
715        if verbose: print "create random numbers list %s long" %new_size
716
717        # Set seed if provided, mainly important for unit test!
718        # plus recalcule seed when no seed provided.
719        if seed_num != None:
720            seed(seed_num, seed_num)
721        else:
722            seed()
723
724        if verbose: print "seed:", get_seed()
725
726        random_num = randint(0, self_size-1, (int(new_size),))
727        random_num = random_num.tolist()
728
729        # need to sort and reverse so the pop() works correctly
730        random_num.sort()
731        random_num.reverse()
732
733        if verbose: print "make random number list and get indices"
734
735        j=0
736        k=1
737        remainder_list = total_list[:]
738
739        # pops array index (random_num) from remainder_list
740        # (which starts as the
741        # total_list and appends to random_list
742        random_num_len = len(random_num)
743        for i in random_num:
744            random_list.append(remainder_list.pop(i))
745            j += 1
746            # prints progress
747            if verbose and round(random_num_len/10*k) == j:
748                print '(%s/%s)' % (j, random_num_len)
749                k += 1
750
751        # FIXME: move to tests, it might take a long time
752        # then create an array of random lenght between 500 and 1000,
753        # and use a random factor between 0 and 1
754        # setup for assertion
755        test_total = random_list[:]
756        test_total.extend(remainder_list)
757        test_total.sort()
758        msg = 'The two random lists made from the original list when added ' \
759              'together DO NOT equal the original list'
760        assert total_list == test_total, msg
761
762        # Get new samples
763        if verbose: print "get values of indices for random list"
764        G1 = self.get_sample(random_list)
765        if verbose: print "get values of indices for opposite of random list"
766        G2 = self.get_sample(remainder_list)
767
768        return G1, G2
769
770    ##
771    # @brief Allow iteration over this object.
772    def __iter__(self):
773        """Read in the header, number_of_points and save the
774        file pointer position
775        """
776        from Scientific.IO.NetCDF import NetCDFFile
777        # FIXME - what to do if the file isn't there
778
779        # FIXME (Ole): Shouldn't this go into the constructor?
780        # This method acts like the constructor when blocking.
781        # ... and shouldn't it be called block_size?
782        #
783        if self.max_read_lines is None:
784            self.max_read_lines = MAX_READ_LINES
785
786        if self.file_name[-4:] == ".pts":
787            # See if the file is there.  Throw a QUIET IO error if it isn't
788            fd = open(self.file_name,'r')
789            fd.close()
790
791            # Throws prints to screen if file not present
792            self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
793
794            (self.blocking_georef,
795             self.blocking_keys,
796             self.number_of_points) = _read_pts_file_header(self.fid,
797                                                            self.verbose)
798            self.start_row = 0
799            self.last_row = self.number_of_points
800            self.show_verbose = 0
801            self.verbose_block_size = (self.last_row + 10)/10
802            self.block_number = 0
803            self.number_of_blocks = self.number_of_points/self.max_read_lines
804            # This computes the number of full blocks. The last block may be
805            # smaller and won't be ircluded in this estimate.
806
807            if self.verbose is True:
808                print 'Reading %d points (in ~%d blocks) from file %s. ' \
809                      % (self.number_of_points,
810                         self.number_of_blocks,
811                         self.file_name),
812                print 'Each block consists of %d data points' \
813                      % self.max_read_lines
814
815        else:
816            # Assume the file is a csv file
817            file_pointer = open(self.file_name)
818            self.header, self.file_pointer = _read_csv_file_header(file_pointer)
819            self.blocking_georef = None # Used for reconciling zones
820
821        return self
822
823    ##
824    # @brief Read another block into the instance.
825    def next(self):
826        """read a block, instanciate a new geospatial and return it"""
827
828        if self.file_name[-4:] == ".pts":
829            if self.start_row == self.last_row:
830                # Read the end of the file last iteration
831                # Remove blocking attributes
832                self.fid.close()
833                del self.max_read_lines
834                del self.blocking_georef
835                del self.last_row
836                del self.start_row
837                del self.blocking_keys
838                del self.fid
839                raise StopIteration
840            fin_row = self.start_row + self.max_read_lines
841            if fin_row > self.last_row:
842                fin_row = self.last_row
843
844            if self.verbose is True:
845                if self.show_verbose >= self.start_row \
846                   and self.show_verbose < fin_row:
847                    print 'Reading block %d (points %d to %d) out of %d'\
848                          %(self.block_number,
849                            self.start_row,
850                            fin_row,
851                            self.number_of_blocks)
852
853                    self.show_verbose += max(self.max_read_lines,
854                                             self.verbose_block_size)
855
856
857            # Read next block
858            pointlist, att_dict, = _read_pts_file_blocking(self.fid,
859                                                           self.start_row,
860                                                           fin_row,
861                                                           self.blocking_keys)
862
863            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
864            self.start_row = fin_row
865
866            self.block_number += 1
867
868        else:
869            # Assume the file is a csv file
870            try:
871                (pointlist,
872                 att_dict,
873                 geo_ref,
874                 self.file_pointer) = \
875                        _read_csv_file_blocking(self.file_pointer,
876                                                self.header[:],
877                                                max_read_lines=\
878                                                    self.max_read_lines,
879                                                verbose=self.verbose)
880
881                # Check that the zones haven't changed.
882                if geo_ref is not None:
883                    geo_ref.reconcile_zones(self.blocking_georef)
884                    self.blocking_georef = geo_ref
885                elif self.blocking_georef is not None:
886                    msg = 'Geo reference given, then not given.'
887                    msg += ' This should not happen.'
888                    raise ValueError, msg
889                geo = Geospatial_data(pointlist, att_dict, geo_ref)
890            except StopIteration:
891                self.file_pointer.close()
892                del self.header
893                del self.file_pointer
894                raise StopIteration
895            except ANUGAError:
896                self.file_pointer.close()
897                del self.header
898                del self.file_pointer
899                raise
900            except SyntaxError:
901                self.file_pointer.close()
902                del self.header
903                del self.file_pointer
904                # This should only be if there is a format error
905                msg = 'Could not open file %s. \n' % self.file_name
906                msg += Error_message['IOError']
907                raise SyntaxError, msg
908        return geo
909
910
911##################### Error messages ###########
912Error_message = {}
913Em = Error_message
914Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
915Em['IOError'] += "        1st line:     [column names]\n"
916Em['IOError'] += "        other lines:  [x value], [y value], [attributes]\n"
917Em['IOError'] += "\n"
918Em['IOError'] += "           for example:\n"
919Em['IOError'] += "           x, y, elevation, friction\n"
920Em['IOError'] += "           0.6, 0.7, 4.9, 0.3\n"
921Em['IOError'] += "           1.9, 2.8, 5, 0.3\n"
922Em['IOError'] += "           2.7, 2.4, 5.2, 0.3\n"
923Em['IOError'] += "\n"
924Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
925Em['IOError'] += "The attribute values must be numeric.\n"
926
927##
928# @brief ??
929# @param latitudes ??
930# @param longitudes ??
931# @param geo_reference ??
932# @param data_points ??
933# @param points_are_lats_longs ??
934def _set_using_lat_long(latitudes,
935                        longitudes,
936                        geo_reference,
937                        data_points,
938                        points_are_lats_longs):
939    """If the points has lat long info, assume it is in (lat, long) order."""
940
941    if geo_reference is not None:
942        msg = "A georeference is specified yet latitude and longitude " \
943              "are also specified!"
944        raise ValueError, msg
945
946    if data_points is not None and not points_are_lats_longs:
947        msg = "Data points are specified yet latitude and longitude are " \
948              "also specified."
949        raise ValueError, msg
950
951    if points_are_lats_longs:
952        if data_points is None:
953            msg = "Data points are not specified."
954            raise ValueError, msg
955        lats_longs = ensure_numeric(data_points)
956        latitudes = num.ravel(lats_longs[:,0:1])
957        longitudes = num.ravel(lats_longs[:,1:])
958
959    if latitudes is None and longitudes is None:
960        msg = "Latitudes and Longitudes are not specified."
961        raise ValueError, msg
962
963    if latitudes is None:
964        msg = "Longitudes are specified yet latitudes aren't."
965        raise ValueError, msg
966
967    if longitudes is None:
968        msg = "Latitudes are specified yet longitudes aren't."
969        raise ValueError, msg
970
971    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
972                                                    longitudes=longitudes)
973    return data_points, Geo_reference(zone=zone)
974
975
976##
977# @brief Read a .pts data file.
978# @param file_name Path to file to read.
979# @param verbose True if this function is to be verbose.
980# @return (pointlist, attributes, geo_reference)
981def _read_pts_file(file_name, verbose=False):
982    """Read .pts NetCDF file
983
984    Return a dic of array of points, and dic of array of attribute
985    eg
986    dic['points'] = [[1.0,2.0],[3.0,5.0]]
987    dic['attributelist']['elevation'] = [[7.0,5.0]]
988    """
989
990    from Scientific.IO.NetCDF import NetCDFFile
991
992    if verbose: print 'Reading ', file_name
993
994    # See if the file is there.  Throw a QUIET IO error if it isn't
995    fd = open(file_name,'r')
996    fd.close()
997
998    # Throws prints to screen if file not present
999    fid = NetCDFFile(file_name, netcdf_mode_r)
1000
1001    pointlist = num.array(fid.variables['points'])
1002    keys = fid.variables.keys()
1003
1004    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1005
1006    try:
1007        keys.remove('points')
1008    except IOError, e:
1009        fid.close()
1010        msg = "Expected keyword 'points' but could not find it"
1011        raise IOError, msg
1012
1013    attributes = {}
1014    for key in keys:
1015        if verbose: print "reading attribute '%s'" % key
1016
1017        attributes[key] = num.array(fid.variables[key])
1018
1019    try:
1020        geo_reference = Geo_reference(NetCDFObject=fid)
1021    except AttributeError, e:
1022        geo_reference = None
1023
1024    fid.close()
1025
1026    return pointlist, attributes, geo_reference
1027
1028
1029##
1030# @brief Read a .csv data file.
1031# @param file_name Path to the .csv file to read.
1032# @param verbose True if this function is to be verbose.
1033def _read_csv_file(file_name, verbose=False):
1034    """Read .csv file
1035
1036    Return a dic of array of points, and dic of array of attribute
1037    eg
1038    dic['points'] = [[1.0,2.0],[3.0,5.0]]
1039    dic['attributelist']['elevation'] = [[7.0,5.0]]
1040    """
1041
1042    file_pointer = open(file_name)
1043    header, file_pointer = _read_csv_file_header(file_pointer)
1044    try:
1045        (pointlist,
1046         att_dict,
1047         geo_ref,
1048         file_pointer) = _read_csv_file_blocking(file_pointer,
1049                                                 header,
1050                                                 max_read_lines=1e30)
1051                                    # If the file is bigger that this, block..
1052                                    # FIXME (Ole) What's up here?
1053    except ANUGAError:
1054        file_pointer.close()
1055        raise
1056
1057    file_pointer.close()
1058
1059    return pointlist, att_dict, geo_ref
1060
1061
1062##
1063# @brief Read a .csv file header.
1064# @param file_pointer Open descriptor of the file to read.
1065# @param delimiter Header line delimiter string, split on this string.
1066# @param verbose True if this function is to be verbose.
1067# @return A tuple of (<cleaned header string>, <input file_pointer>)
1068
1069CSV_DELIMITER = ','
1070
1071def _read_csv_file_header(file_pointer,
1072                          delimiter=CSV_DELIMITER,
1073                          verbose=False):
1074    """Read the header of a .csv file
1075    Return a list of the header names
1076    """
1077
1078    line = file_pointer.readline().strip()
1079    header = clean_line(line, delimiter)
1080
1081    return header, file_pointer
1082
1083##
1084# @brief Read a .csv file, with blocking.
1085# @param file_pointer Open descriptor of the file to read.
1086# @param header List of already read .csv header fields.
1087# @param delimiter Delimiter string header was split on.
1088# @param max_read_lines The max number of lines to read before blocking.
1089# @param verbose True if this function is to be verbose.
1090# @note Will throw IndexError, SyntaxError exceptions.
1091def _read_csv_file_blocking(file_pointer,
1092                            header,
1093                            delimiter=CSV_DELIMITER,
1094                            max_read_lines=MAX_READ_LINES,
1095                            verbose=False):
1096    """Read the body of a .csv file.
1097    header: The list header of the csv file, with the x and y labels.
1098    """
1099
1100    points = []
1101    pointattributes = []
1102    att_dict = {}
1103
1104    # This is to remove the x and y headers.
1105    header = header[:]
1106    try:
1107        x_header = header.pop(0)
1108        y_header = header.pop(0)
1109    except IndexError:
1110        # if there are not two columns this will occur.
1111        # eg if it is a space seperated file
1112        raise SyntaxError
1113
1114    read_lines = 0
1115    while read_lines < max_read_lines:
1116        line = file_pointer.readline()
1117        numbers = clean_line(line, delimiter)
1118        if len(numbers) <= 1:
1119            break
1120        if line[0] == '#':
1121            continue
1122
1123        read_lines += 1
1124
1125        try:
1126            x = float(numbers[0])
1127            y = float(numbers[1])
1128            points.append([x,y])
1129            numbers.pop(0)
1130            numbers.pop(0)
1131            if len(header) != len(numbers):
1132                file_pointer.close()
1133                msg = "File load error. " \
1134                      "There might be a problem with the file header."
1135                raise SyntaxError, msg
1136            for i,n in enumerate(numbers):
1137                n.strip()
1138                if n != '\n' and n != '':
1139                    #attributes.append(float(n))
1140                    att_dict.setdefault(header[i],[]).append(float(n))
1141        except ValueError:
1142            raise SyntaxError
1143
1144    if points == []:
1145        raise StopIteration
1146
1147    pointlist = num.array(points, num.float)
1148    for key in att_dict.keys():
1149        att_dict[key] = num.array(att_dict[key], num.float)
1150
1151    # Do stuff here so the info is in lat's and longs
1152    geo_ref = None
1153    x_header = lower(x_header[:3])
1154    y_header = lower(y_header[:3])
1155    if (x_header == 'lon' or  x_header == 'lat') \
1156       and (y_header == 'lon' or  y_header == 'lat'):
1157        if x_header == 'lon':
1158            longitudes = num.ravel(pointlist[:,0:1])
1159            latitudes = num.ravel(pointlist[:,1:])
1160        else:
1161            latitudes = num.ravel(pointlist[:,0:1])
1162            longitudes = num.ravel(pointlist[:,1:])
1163
1164        pointlist, geo_ref = _set_using_lat_long(latitudes,
1165                                                 longitudes,
1166                                                 geo_reference=None,
1167                                                 data_points=None,
1168                                                 points_are_lats_longs=False)
1169
1170    return pointlist, att_dict, geo_ref, file_pointer
1171
1172
1173##
1174# @brief Read a .pts file header.
1175# @param fid Handle to the open .pts file.
1176# @param verbose True if the function is to be verbose.
1177# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1178# @note Will throw IOError and AttributeError exceptions.
1179def _read_pts_file_header(fid, verbose=False):
1180    """Read the geo_reference and number_of_points from a .pts file"""
1181
1182    keys = fid.variables.keys()
1183    try:
1184        keys.remove('points')
1185    except IOError, e:
1186        fid.close()
1187        msg = "Expected keyword 'points' but could not find it."
1188        raise IOError, msg
1189
1190    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1191
1192    try:
1193        geo_reference = Geo_reference(NetCDFObject=fid)
1194    except AttributeError, e:
1195        geo_reference = None
1196
1197    return geo_reference, keys, fid.dimensions['number_of_points']
1198
1199
1200##
1201# @brief Read the body of a .csf file, with blocking.
1202# @param fid Handle to already open file.
1203# @param start_row Start row index of points to return.
1204# @param fin_row End row index of points to return.
1205# @param keys Iterable of keys to return.
1206# @return Tuple of (pointlist, attributes).
1207def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1208    """Read the body of a .csv file."""
1209
1210    pointlist = num.array(fid.variables['points'][start_row:fin_row])
1211
1212    attributes = {}
1213    for key in keys:
1214        attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1215
1216    return pointlist, attributes
1217
1218
1219##
1220# @brief Write a .pts data file.
1221# @param file_name Path to the file to write.
1222# @param write_data_points Data points to write.
1223# @param write_attributes Attributes to write.
1224# @param write_geo_reference Georef to write.
1225def _write_pts_file(file_name,
1226                    write_data_points,
1227                    write_attributes=None,
1228                    write_geo_reference=None):
1229    """Write .pts NetCDF file
1230
1231    NOTE: Below might not be valid ask Duncan : NB 5/2006
1232
1233    WARNING: This function mangles the point_atts data structure
1234    # F??ME: (DSG)This format has issues.
1235    # There can't be an attribute called points
1236    # consider format change
1237    # method changed by NB not sure if above statement is correct
1238
1239    should create new test for this
1240    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1241    for key in point_atts.keys():
1242        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1243        assert key in legal_keys, msg
1244    """
1245
1246    from Scientific.IO.NetCDF import NetCDFFile
1247
1248    # NetCDF file definition
1249    outfile = NetCDFFile(file_name, netcdf_mode_w)
1250
1251    # Create new file
1252    outfile.institution = 'Geoscience Australia'
1253    outfile.description = 'NetCDF format for compact and portable storage ' \
1254                          'of spatial point data'
1255
1256    # Dimension definitions
1257    shape = write_data_points.shape[0]
1258    outfile.createDimension('number_of_points', shape)
1259    outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1260
1261    # Variable definition
1262    outfile.createVariable('points', netcdf_float, ('number_of_points',
1263                                                    'number_of_dimensions'))
1264
1265    # create variables
1266    outfile.variables['points'][:] = write_data_points
1267
1268    if write_attributes is not None:
1269        for key in write_attributes.keys():
1270            outfile.createVariable(key, netcdf_float, ('number_of_points',))
1271            outfile.variables[key][:] = write_attributes[key]
1272
1273    if write_geo_reference is not None:
1274        write_NetCDF_georeference(write_geo_reference, outfile)
1275
1276    outfile.close()
1277
1278
1279##
1280# @brief Write a .csv data file.
1281# @param file_name Path to the file to write.
1282# @param write_data_points Data points to write.
1283# @param write_attributes Attributes to write.
1284# @param as_lat_long True if points are lat/lon, else x/y.
1285# @param delimiter The CSV delimiter to use.
1286def _write_csv_file(file_name,
1287                    write_data_points,
1288                    write_attributes=None,
1289                    as_lat_long=False,
1290                    delimiter=','):
1291    """Write a .csv file."""
1292
1293    points = write_data_points
1294    pointattributes = write_attributes
1295
1296    fd = open(file_name, 'w')
1297
1298    if as_lat_long:
1299        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1300    else:
1301        titlelist = "x" + delimiter + "y"  + delimiter
1302
1303    if pointattributes is not None:
1304        for title in pointattributes.keys():
1305            titlelist = titlelist + title + delimiter
1306        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1307
1308    fd.write(titlelist + "\n")
1309
1310    # <x/lat> <y/long> [attributes]
1311    for i, vert in enumerate( points):
1312        if pointattributes is not None:
1313            attlist = ","
1314            for att in pointattributes.keys():
1315                attlist = attlist + str(pointattributes[att][i]) + delimiter
1316            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1317            attlist.strip()
1318        else:
1319            attlist = ''
1320
1321        fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1322
1323    fd.close()
1324
1325
1326##
1327# @brief Write a URS file.
1328# @param file_name The path of the file to write.
1329# @param points
1330# @param delimiter
1331def _write_urs_file(file_name, points, delimiter=' '):
1332    """Write a URS format file.
1333    export a file, file_name, with the urs format
1334    the data points are in lats and longs
1335    """
1336
1337    fd = open(file_name, 'w')
1338
1339    # first line is # points
1340    fd.write(str(len(points)) + "\n")
1341
1342    # <lat> <long> <id#>
1343    for i, vert in enumerate( points):
1344        fd.write(str(round(vert[0],7)) + delimiter +
1345                 str(round(vert[1],7)) + delimiter + str(i) + "\n")
1346
1347    fd.close()
1348
1349
1350##
1351# @brief ??
1352# @param point_atts ??
1353# @return ??
1354def _point_atts2array(point_atts):
1355    point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1356
1357    for key in point_atts['attributelist'].keys():
1358        point_atts['attributelist'][key] = \
1359                num.array(point_atts['attributelist'][key], num.float)
1360
1361    return point_atts
1362
1363
1364##
1365# @brief Convert geospatial object to a points dictionary.
1366# @param geospatial_data The geospatial object to convert.
1367# @return A points dictionary.
1368def geospatial_data2points_dictionary(geospatial_data):
1369    """Convert geospatial data to points_dictionary"""
1370
1371    points_dictionary = {}
1372    points_dictionary['pointlist'] = geospatial_data.data_points
1373
1374    points_dictionary['attributelist'] = {}
1375
1376    for attribute_name in geospatial_data.attributes.keys():
1377        val = geospatial_data.attributes[attribute_name]
1378        points_dictionary['attributelist'][attribute_name] = val
1379
1380    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1381
1382    return points_dictionary
1383
1384
1385##
1386# @brief Convert a points dictionary to a geospatial object.
1387# @param points_dictionary A points dictionary to convert.
1388def points_dictionary2geospatial_data(points_dictionary):
1389    """Convert points_dictionary to geospatial data object"""
1390
1391    msg = 'Points dictionary must have key pointlist'
1392    assert points_dictionary.has_key('pointlist'), msg
1393
1394    msg = 'Points dictionary must have key attributelist'
1395    assert points_dictionary.has_key('attributelist'), msg
1396
1397    if points_dictionary.has_key('geo_reference'):
1398        geo = points_dictionary['geo_reference']
1399    else:
1400        geo = None
1401
1402    return Geospatial_data(points_dictionary['pointlist'],
1403                           points_dictionary['attributelist'],
1404                           geo_reference = geo)
1405
1406
1407##
1408# @brief Split a string into 'clean' fields.
1409# @param str The string to process.
1410# @param delimiter The delimiter string to split 'line' with.
1411# @return A list of 'cleaned' field strings.
1412# @note Any fields that were initially zero length will be removed.
1413# @note If a field contains '\n' it isn't zero length.
1414def clean_line(str, delimiter):
1415    """Split string on given delimiter, remove whitespace from each field."""
1416
1417    return [x.strip() for x in str.split(delimiter) if x != '']
1418
1419
1420##
1421# @brief Ensure that points are in absolute coordinates.
1422# @param points A list or array of points to check, or geospatial object.
1423# @param geo_reference If supplied,
1424# @return ??
1425def ensure_absolute(points, geo_reference=None):
1426    """Ensure that points are in absolute coordinates.
1427
1428    This function inputs several formats and
1429    outputs one format. - a numeric array of absolute points.
1430
1431    Input formats are;
1432      points: List or numeric array of coordinate pairs [xi, eta] of
1433              points or geospatial object or points file name
1434
1435    mesh_origin: A geo_reference object or 3-tuples consisting of
1436                 UTM zone, easting and northing.
1437                 If specified vertex coordinates are assumed to be
1438                 relative to their respective origins.
1439    """
1440
1441    # Input check
1442    if isinstance(points, basestring):
1443        # It's a string - assume it is a point file
1444        points = Geospatial_data(file_name=points)
1445
1446    if isinstance(points, Geospatial_data):
1447        points = points.get_data_points(absolute=True)
1448        msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1449        assert geo_reference == None, msg
1450    else:
1451        points = ensure_numeric(points, num.float)
1452
1453    # Sort of geo_reference and convert points
1454    if geo_reference is None:
1455        geo = None # Geo_reference()
1456    else:
1457        if isinstance(geo_reference, Geo_reference):
1458            geo = geo_reference
1459        else:
1460            geo = Geo_reference(geo_reference[0],
1461                                geo_reference[1],
1462                                geo_reference[2])
1463        points = geo.get_absolute(points)
1464
1465    return points
1466
1467
1468##
1469# @brief
1470# @param points
1471# @param geo_reference
1472# @return A geospatial object.
1473def ensure_geospatial(points, geo_reference=None):
1474    """Convert various data formats to a geospatial_data instance.
1475
1476    Inputed formats are;
1477    points:      List or numeric array of coordinate pairs [xi, eta] of
1478                 points or geospatial object
1479
1480    mesh_origin: A geo_reference object or 3-tuples consisting of
1481                 UTM zone, easting and northing.
1482                 If specified vertex coordinates are assumed to be
1483                 relative to their respective origins.
1484    """
1485
1486    # Input check
1487    if isinstance(points, Geospatial_data):
1488        msg = "Use a Geospatial_data object or a mesh origin, not both."
1489        assert geo_reference is None, msg
1490        return points
1491    else:
1492        # List or numeric array of absolute points
1493        points = ensure_numeric(points, num.float)
1494
1495    # Sort out geo reference
1496    if geo_reference is None:
1497        geo = None
1498    else:
1499        if isinstance(geo_reference, Geo_reference):
1500            geo = geo_reference
1501        else:
1502            geo = Geo_reference(geo_reference[0],
1503                                geo_reference[1],
1504                                geo_reference[2])
1505
1506    # Create Geospatial_data object with appropriate geo reference and return
1507    points = Geospatial_data(data_points=points, geo_reference=geo)
1508
1509    return points
1510
1511
1512##
1513# @brief
1514# @param data_file
1515# @param alpha_list
1516# @param mesh_file
1517# @param boundary_poly
1518# @param mesh_resolution
1519# @param north_boundary
1520# @param south_boundary
1521# @param east_boundary
1522# @param west_boundary
1523# @param plot_name
1524# @param split_factor
1525# @param seed_num
1526# @param cache
1527# @param verbose
1528def find_optimal_smoothing_parameter(data_file,
1529                                     alpha_list=None,
1530                                     mesh_file=None,
1531                                     boundary_poly=None,
1532                                     mesh_resolution=100000,
1533                                     north_boundary=None,
1534                                     south_boundary=None,
1535                                     east_boundary=None,
1536                                     west_boundary=None,
1537                                     plot_name='all_alphas',
1538                                     split_factor=0.1,
1539                                     seed_num=None,
1540                                     cache=False,
1541                                     verbose=False):
1542    """Removes a small random sample of points from 'data_file'.
1543    Then creates models with different alpha values from 'alpha_list' and
1544    cross validates the predicted value to the previously removed point data.
1545    Returns the alpha value which has the smallest covariance.
1546
1547    data_file: must not contain points outside the boundaries defined
1548               and it either a pts, txt or csv file.
1549
1550    alpha_list: the alpha values to test in a single list
1551
1552    mesh_file: name of the created mesh file or if passed in will read it.
1553               NOTE, if there is a mesh file mesh_resolution,
1554               north_boundary, south... etc will be ignored.
1555
1556    mesh_resolution: the maximum area size for a triangle
1557
1558    north_boundary... west_boundary: the value of the boundary
1559
1560    plot_name: the name for the plot contain the results
1561
1562    seed_num: the seed to the random number generator
1563
1564    USAGE:
1565        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1566                                             alpha_list=[0.0001, 0.01, 1],
1567                                             mesh_file=None,
1568                                             mesh_resolution=3,
1569                                             north_boundary=5,
1570                                             south_boundary=-5,
1571                                             east_boundary=5,
1572                                             west_boundary=-5,
1573                                             plot_name='all_alphas',
1574                                             seed_num=100000,
1575                                             verbose=False)
1576
1577    OUTPUT: returns the minumum normalised covalance calculate AND the
1578           alpha that created it. PLUS writes a plot of the results
1579
1580    NOTE: code will not work if the data_file extent is greater than the
1581    boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1582    """
1583
1584    from anuga.shallow_water import Domain
1585    from anuga.geospatial_data.geospatial_data import Geospatial_data
1586    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1587    from anuga.utilities.numerical_tools import cov
1588    from anuga.utilities.polygon import is_inside_polygon
1589    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1590
1591
1592    attribute_smoothed='elevation'
1593
1594    if mesh_file is None:
1595        if verbose: print "building mesh"
1596        mesh_file = 'temp.msh'
1597
1598        if north_boundary is None or south_boundary is None \
1599           or east_boundary is None or west_boundary is None:
1600            no_boundary = True
1601        else:
1602            no_boundary = False
1603
1604        if no_boundary is True:
1605            msg = 'All boundaries must be defined'
1606            raise Expection, msg
1607
1608        poly_topo = [[east_boundary,south_boundary],
1609                     [east_boundary,north_boundary],
1610                     [west_boundary,north_boundary],
1611                     [west_boundary,south_boundary]]
1612
1613        create_mesh_from_regions(poly_topo,
1614                                 boundary_tags={'back': [2],
1615                                                'side': [1,3],
1616                                                'ocean': [0]},
1617                                 maximum_triangle_area=mesh_resolution,
1618                                 filename=mesh_file,
1619                                 use_cache=cache,
1620                                 verbose=verbose)
1621
1622    else: # if mesh file provided
1623        # test mesh file exists?
1624        if verbose: "reading from file: %s" % mesh_file
1625        if access(mesh_file,F_OK) == 0:
1626            msg = "file %s doesn't exist!" % mesh_file
1627            raise IOError, msg
1628
1629    # split topo data
1630    if verbose: print 'Reading elevation file: %s' % data_file
1631    G = Geospatial_data(file_name = data_file)
1632    if verbose: print 'Start split'
1633    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1634    if verbose: print 'Finish split'
1635    points = G_small.get_data_points()
1636
1637    if verbose: print "Number of points in sample to compare: ", len(points)
1638
1639    if alpha_list == None:
1640        alphas = [0.001,0.01,100]
1641        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1642         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1643    else:
1644        alphas = alpha_list
1645
1646    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1647    # 4 onwards is the elevation_predicted using the alpha, which will
1648    # be compared later against the real removed data
1649    data = num.array([], dtype=num.float)
1650
1651    data=num.resize(data, (len(points), 3+len(alphas)))
1652
1653    # gets relative point from sample
1654    data[:,0] = points[:,0]
1655    data[:,1] = points[:,1]
1656    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1657    data[:,2] = elevation_sample
1658
1659    normal_cov=num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1660
1661    if verbose: print 'Setup computational domains with different alphas'
1662
1663    for i, alpha in enumerate(alphas):
1664        # add G_other data to domains with different alphas
1665        if verbose:
1666            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1667        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1668        if verbose: print domain.statistics()
1669        domain.set_quantity(attribute_smoothed,
1670                            geospatial_data=G_other,
1671                            use_cache=cache,
1672                            verbose=verbose,
1673                            alpha=alpha)
1674
1675        # Convert points to geospatial data for use with get_values below
1676        points_geo = Geospatial_data(points, domain.geo_reference)
1677
1678        # returns the predicted elevation of the points that were "split" out
1679        # of the original data set for one particular alpha
1680        if verbose: print 'Get predicted elevation for location to be compared'
1681        elevation_predicted = \
1682                domain.quantities[attribute_smoothed].\
1683                    get_values(interpolation_points=points_geo)
1684
1685        # add predicted elevation to array that starts with x, y, z...
1686        data[:,i+3] = elevation_predicted
1687
1688        sample_cov = cov(elevation_sample)
1689        ele_cov = cov(elevation_sample - elevation_predicted)
1690        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1691
1692        if verbose:
1693            print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1694                      normal_cov[i][1]
1695            print '-------------------------------------------- \n'
1696
1697    normal_cov0 = normal_cov[:,0]
1698    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1699
1700    if plot_name is not None:
1701        from pylab import savefig, semilogx, loglog
1702
1703        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1704        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1705        savefig(plot_name, dpi=300)
1706
1707    if mesh_file == 'temp.msh':
1708        remove(mesh_file)
1709
1710    if verbose:
1711        print 'Final results:'
1712        for i, alpha in enumerate(alphas):
1713            print 'covariance for alpha %s = %s ' \
1714                  % (normal_cov[i][0], normal_cov[i][1])
1715        print '\n Optimal alpha is: %s ' \
1716              % normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0]
1717
1718    # covariance and optimal alpha
1719    return (min(normal_cov_new[:,1]),
1720            normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1721
1722
1723##
1724# @brief
1725# @param data_file
1726# @param alpha_list
1727# @param mesh_file
1728# @param boundary_poly
1729# @param mesh_resolution
1730# @param north_boundary
1731# @param south_boundary
1732# @param east_boundary
1733# @param west_boundary
1734# @param plot_name
1735# @param split_factor
1736# @param seed_num
1737# @param cache
1738# @param verbose
1739def old_find_optimal_smoothing_parameter(data_file,
1740                                         alpha_list=None,
1741                                         mesh_file=None,
1742                                         boundary_poly=None,
1743                                         mesh_resolution=100000,
1744                                         north_boundary=None,
1745                                         south_boundary=None,
1746                                         east_boundary=None,
1747                                         west_boundary=None,
1748                                         plot_name='all_alphas',
1749                                         split_factor=0.1,
1750                                         seed_num=None,
1751                                         cache=False,
1752                                         verbose=False):
1753    """
1754    data_file: must not contain points outside the boundaries defined
1755               and it either a pts, txt or csv file.
1756
1757    alpha_list: the alpha values to test in a single list
1758
1759    mesh_file: name of the created mesh file or if passed in will read it.
1760               NOTE, if there is a mesh file mesh_resolution,
1761               north_boundary, south... etc will be ignored.
1762
1763    mesh_resolution: the maximum area size for a triangle
1764
1765    north_boundary... west_boundary: the value of the boundary
1766
1767    plot_name: the name for the plot contain the results
1768
1769    seed_num: the seed to the random number generator
1770
1771    USAGE:
1772        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1773                                             alpha_list=[0.0001, 0.01, 1],
1774                                             mesh_file=None,
1775                                             mesh_resolution=3,
1776                                             north_boundary=5,
1777                                             south_boundary=-5,
1778                                             east_boundary=5,
1779                                             west_boundary=-5,
1780                                             plot_name='all_alphas',
1781                                             seed_num=100000,
1782                                             verbose=False)
1783
1784    OUTPUT: returns the minumum normalised covalance calculate AND the
1785            alpha that created it. PLUS writes a plot of the results
1786
1787    NOTE: code will not work if the data_file extend is greater than the
1788          boundary_polygon or the north_boundary...west_boundary
1789    """
1790
1791    from anuga.shallow_water import Domain
1792    from anuga.geospatial_data.geospatial_data import Geospatial_data
1793    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1794    from anuga.utilities.numerical_tools import cov
1795    from anuga.utilities.polygon import is_inside_polygon
1796    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1797
1798
1799    attribute_smoothed = 'elevation'
1800
1801    if mesh_file is None:
1802        mesh_file = 'temp.msh'
1803
1804        if north_boundary is None or south_boundary is None \
1805           or east_boundary is None or west_boundary is None:
1806            no_boundary = True
1807        else:
1808            no_boundary = False
1809
1810        if no_boundary is True:
1811            msg = 'All boundaries must be defined'
1812            raise Expection, msg
1813
1814        poly_topo = [[east_boundary,south_boundary],
1815                     [east_boundary,north_boundary],
1816                     [west_boundary,north_boundary],
1817                     [west_boundary,south_boundary]]
1818
1819        create_mesh_from_regions(poly_topo,
1820                                 boundary_tags={'back': [2],
1821                                                'side': [1,3],
1822                                                'ocean': [0]},
1823                                 maximum_triangle_area=mesh_resolution,
1824                                 filename=mesh_file,
1825                                 use_cache=cache,
1826                                 verbose=verbose)
1827
1828    else: # if mesh file provided
1829        # test mesh file exists?
1830        if access(mesh_file,F_OK) == 0:
1831            msg = "file %s doesn't exist!" % mesh_file
1832            raise IOError, msg
1833
1834    # split topo data
1835    G = Geospatial_data(file_name=data_file)
1836    if verbose: print 'start split'
1837    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1838    if verbose: print 'finish split'
1839    points = G_small.get_data_points()
1840
1841    # FIXME: Remove points outside boundary polygon
1842#    print 'new point',len(points)
1843#
1844#    new_points=[]
1845#    new_points=array([],dtype=float)
1846#    new_points=resize(new_points,(len(points),2))
1847#    print "BOUNDARY", boundary_poly
1848#    for i,point in enumerate(points):
1849#        if is_inside_polygon(point,boundary_poly, verbose=True):
1850#            new_points[i] = point
1851#            print"WOW",i,new_points[i]
1852#    points = new_points
1853
1854    if verbose: print "Number of points in sample to compare: ", len(points)
1855
1856    if alpha_list == None:
1857        alphas = [0.001,0.01,100]
1858        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1859         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1860    else:
1861        alphas = alpha_list
1862
1863    domains = {}
1864
1865    if verbose: print 'Setup computational domains with different alphas'
1866
1867    for alpha in alphas:
1868        # add G_other data to domains with different alphas
1869        if verbose:
1870            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1871        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1872        if verbose: print domain.statistics()
1873        domain.set_quantity(attribute_smoothed,
1874                            geospatial_data=G_other,
1875                            use_cache=cache,
1876                            verbose=verbose,
1877                            alpha=alpha)
1878        domains[alpha] = domain
1879
1880    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1881    # 4 onwards is the elevation_predicted using the alpha, which will
1882    # be compared later against the real removed data
1883    data = num.array([], dtype=num.float)
1884
1885    data = num.resize(data, (len(points), 3+len(alphas)))
1886
1887    # gets relative point from sample
1888    data[:,0] = points[:,0]
1889    data[:,1] = points[:,1]
1890    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1891    data[:,2] = elevation_sample
1892
1893    normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1894
1895    if verbose:
1896        print 'Determine difference between predicted results and actual data'
1897    for i, alpha in enumerate(domains):
1898        if verbose: print'Alpha =', alpha
1899
1900        points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1901        # returns the predicted elevation of the points that were "split" out
1902        # of the original data set for one particular alpha
1903        elevation_predicted = \
1904                domains[alpha].quantities[attribute_smoothed].\
1905                        get_values(interpolation_points=points_geo)
1906
1907        # add predicted elevation to array that starts with x, y, z...
1908        data[:,i+3] = elevation_predicted
1909
1910        sample_cov = cov(elevation_sample)
1911        ele_cov = cov(elevation_sample - elevation_predicted)
1912        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1913        print 'memory usage during compare', mem_usage()
1914        if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1915
1916    normal_cov0 = normal_cov[:,0]
1917    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1918
1919    if plot_name is not None:
1920        from pylab import savefig,semilogx,loglog
1921
1922        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1923        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1924        savefig(plot_name, dpi=300)
1925    if mesh_file == 'temp.msh':
1926        remove(mesh_file)
1927
1928    return (min(normal_cov_new[:,1]),
1929            normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1930
1931
1932if __name__ == "__main__":
1933    pass
1934
Note: See TracBrowser for help on using the repository browser.