source: branches/numpy/anuga/geospatial_data/geospatial_data.py @ 6360

Last change on this file since 6360 was 6360, checked in by rwilson, 15 years ago

Ongoing conversion changes.

File size: 69.9 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5
6from sys import maxint
7from os import access, F_OK, R_OK,remove
8from types import DictType
9from warnings import warn
10from string import lower
11from RandomArray import randint, seed, get_seed
12from copy import deepcopy
13from Scientific.IO.NetCDF import NetCDFFile
14
15import numpy as num
16
17from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL
18from anuga.utilities.numerical_tools import ensure_numeric
19from anuga.coordinate_transforms.geo_reference import Geo_reference, \
20     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
21from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
22from anuga.utilities.system_tools import clean_line
23from anuga.utilities.anuga_exceptions import ANUGAError
24from anuga.config import points_file_block_line_size as MAX_READ_LINES
25from anuga.config import netcdf_mode_r, netcdf_mode_w, netcdf_mode_a
26from anuga.config import netcdf_float
27
28DEFAULT_ATTRIBUTE = 'elevation'
29
30
31##
32# @brief ??
33class Geospatial_data:
34
35    ##
36    # @brief
37    # @param data_points Mx2 iterable of tuples or array of x,y coordinates.
38    # @param attributes Associated values for each data point.
39    # @param geo_reference ??
40    # @param default_attribute_name ??
41    # @param file_name
42    # @param latitudes ??
43    # @param longitudes ??
44    # @param points_are_lats_longs True if points are lat/long, not UTM.
45    # @param max_read_lines Size of block to read, if blocking.
46    # @param load_file_now True if blocking but we eant to read file now.
47    # @param verbose True if this class instance is verbose.
48    def __init__(self,
49                 data_points=None, # this can also be a points file name
50                 attributes=None,
51                 geo_reference=None,
52                 default_attribute_name=None,
53                 file_name=None,
54                 latitudes=None,
55                 longitudes=None,
56                 points_are_lats_longs=False,
57                 max_read_lines=None,
58                 load_file_now=True,
59                 verbose=False):
60        """Create instance from data points and associated attributes
61
62        data_points: x,y coordinates in meters. Type must be either a
63        sequence of 2-tuples or an Mx2 numeric array of floats.  A file name
64        with extension .txt, .cvs or .pts can also be passed in here.
65
66        attributes: Associated values for each data point. The type
67        must be either a list or an array of length M or a dictionary
68        of lists (or arrays) of length M. In the latter case the keys
69        in the dictionary represent the attribute names, in the former
70        the attribute will get the default name "elevation".
71
72        geo_reference: Object representing the origin of the data
73        points. It contains UTM zone, easting and northing and data
74        points are assumed to be relative to this origin.
75        If geo_reference is None, the default geo ref object is used.
76
77        default_attribute_name: Name of default attribute to be used with
78        get_attribute_values. The idea is that the dataset can be
79        equipped with information about which attribute to return.
80        If None, the default is the "first"
81
82        latitudes, longitudes: Vectors of latitudes and longitudes,
83        used to specify location instead of points.
84
85        points_are_lats_longs: Set this as true if the points are actually
86        lats and longs, not UTM
87
88        max_read_lines: The number of rows read into memory when using
89        blocking to read a file.
90
91        load_file_now:  If true the file is automatically loaded
92        into the geospatial instance. Used when blocking.
93
94        file_name: Name of input netCDF file or .txt file. netCDF file must
95        have dimensions "points" etc.
96        .txt file is a comma seperated file with x, y and attribute
97        data.
98
99        The first line has the titles of the columns.  The first two
100        column titles are checked to see if they start with lat or
101        long (not case sensitive).  If so the data is assumed to be
102        latitude and longitude, in decimal format and converted to
103        UTM.  Otherwise the first two columns are assumed to be the x
104        and y, and the title names acually used are ignored.
105
106
107        The format for a .txt file is:
108            1st line:     [column names]
109            other lines:  x y [attributes]
110
111            for example:
112            x, y, elevation, friction
113            0.6, 0.7, 4.9, 0.3
114            1.9, 2.8, 5, 0.3
115            2.7, 2.4, 5.2, 0.3
116
117        The first two columns have to be x, y or lat, long
118        coordinates.
119
120
121        The format for a Points dictionary is:
122          ['pointlist'] a 2 column array describing points. 1st column x,
123          2nd column y.
124          ['attributelist'], a dictionary of 1D arrays, representing
125          attribute values at the point.  The dictionary key is the attribute
126          header.
127          ['geo_reference'] a Geo_refernece object. Use if the point
128          information is relative. This is optional.
129            eg
130            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
131            dic['attributelist']['elevation'] = [[7.0,5.0]
132
133        verbose:
134        """
135
136        if isinstance(data_points, basestring):
137            # assume data_points is really a file name
138            file_name = data_points
139
140        self.set_verbose(verbose)
141        self.geo_reference = None # create the attribute
142        self.file_name = file_name
143
144        if max_read_lines is None:
145            self.max_read_lines = MAX_READ_LINES
146        else:
147            self.max_read_lines = max_read_lines
148
149        if file_name is None:
150            if latitudes is not None \
151               or longitudes is not None \
152               or points_are_lats_longs:
153                data_points, geo_reference = \
154                    _set_using_lat_long(latitudes=latitudes,
155                                        longitudes=longitudes,
156                                        geo_reference=geo_reference,
157                                        data_points=data_points,
158                                        points_are_lats_longs=\
159                                        points_are_lats_longs)
160            self.check_data_points(data_points)
161            self.set_attributes(attributes)
162            self.set_geo_reference(geo_reference)
163            self.set_default_attribute_name(default_attribute_name)
164        elif load_file_now is True:
165            # watch for case where file name and points,
166            # attributes etc are provided!!
167            # if file name then all provided info will be removed!
168
169            if verbose is True:
170                if file_name is not None:
171                    print 'Loading Geospatial data from file: %s' % file_name
172
173            self.import_points_file(file_name, verbose=verbose)
174
175            self.check_data_points(self.data_points)
176            self.set_attributes(self.attributes)
177            self.set_geo_reference(self.geo_reference)
178            self.set_default_attribute_name(default_attribute_name)
179
180        if verbose is True:
181            if file_name is not None:
182                print 'Geospatial data created from file: %s' % file_name
183                if load_file_now is False:
184                    print 'Data will be loaded blockwise on demand'
185
186                    if file_name.endswith('csv') or file_name.endswith('txt'):
187                        pass
188                        # This message was misleading.
189                        # FIXME (Ole): Are we blocking here or not?
190                        # print 'ASCII formats are not that great for '
191                        # print 'blockwise reading. Consider storing this'
192                        # print 'data as a pts NetCDF format'
193
194    ##
195    # @brief Return length of the points set.
196    def __len__(self):
197        return len(self.data_points)
198
199    ##
200    # @brief Return a string representation of the points set.
201    def __repr__(self):
202        return str(self.get_data_points(absolute=True))
203
204    ##
205    # @brief Check data points.
206    # @param data_points Points data to check and store in instance.
207    # @note Throws ValueError exception if no data.
208    def check_data_points(self, data_points):
209        """Checks data points"""
210
211        if data_points is None:
212            self.data_points = None
213            msg = 'There is no data or file provided!'
214            raise ValueError, msg
215
216        else:
217            self.data_points = ensure_numeric(data_points)
218            return
219
220            print 'self.data_points=%s' % str(self.data_points)
221            print 'self.data_points.shape=%s' % str(self.data_points.shape)
222            if not (0,) == self.data_points.shape:
223                assert len(self.data_points.shape) == 2
224                assert self.data_points.shape[1] == 2
225
226    ##
227    # @brief Check and assign attributes data.
228    # @param attributes Dictionary or scalar to save as .attributes.
229    # @note Throws exception if unable to convert dict keys to numeric.
230    def set_attributes(self, attributes):
231        """Check and assign attributes dictionary"""
232
233        if attributes is None:
234            self.attributes = None
235            return
236
237        if not isinstance(attributes, DictType):
238            # Convert single attribute into dictionary
239            attributes = {DEFAULT_ATTRIBUTE: attributes}
240
241        # Check input attributes
242        for key in attributes.keys():
243            try:
244                attributes[key] = ensure_numeric(attributes[key])
245            except:
246                msg = ("Attribute '%s' (%s) could not be converted to a"
247                       "numeric vector" % (str(key), str(attributes[key])))
248                raise Exception, msg
249
250        self.attributes = attributes
251
252    ##
253    # @brief Set the georeference of geospatial data.
254    # @param geo_reference The georeference data to set.
255    # @note Will raise exception if param not instance of Geo_reference.
256    def set_geo_reference(self, geo_reference):
257        """Set the georeference of geospatial data.
258
259        It can also be used to change the georeference and will ensure that
260        the absolute coordinate values are unchanged.
261        """
262
263        from anuga.coordinate_transforms.geo_reference import Geo_reference
264
265        if geo_reference is None:
266            # Use default - points are in absolute coordinates
267            geo_reference = Geo_reference()
268
269        # Allow for tuple (zone, xllcorner, yllcorner)
270        geo_reference = ensure_geo_reference(geo_reference)
271
272        if not isinstance(geo_reference, Geo_reference):
273            # FIXME (Ole): This exception will be raised even
274            # if geo_reference is None. Is that the intent Duncan?
275            msg = 'Argument geo_reference must be a valid Geo_reference '
276            msg += 'object or None.'
277            raise Expection, msg
278
279        # If a geo_reference already exists, change the point data according to
280        # the new geo reference
281        if  self.geo_reference is not None:
282            self.data_points = self.get_data_points(geo_reference=geo_reference)
283
284        self.geo_reference = geo_reference
285
286    ##
287    # @brief Set default attribute name.
288    # @param default_attribute_name The default to save.
289    def set_default_attribute_name(self, default_attribute_name):
290        self.default_attribute_name = default_attribute_name
291
292    ##
293    # @brief Set the instance verbose flag.
294    # @param verbose The value to save.
295    # @note Will raise exception if param is not True or False.
296    def set_verbose(self, verbose=False):
297        if verbose in [False, True]:
298            self.verbose = verbose
299        else:
300            msg = 'Illegal value: %s' % str(verbose)
301            raise Exception, msg
302
303    ##
304    # @brief Clip geospatial data by a given polygon.
305    # @param polygon The polygon to clip with.
306    # @param closed True if points on clip boundary are not included in result.
307    # @param verbose True if this function is verbose.
308    def clip(self, polygon, closed=True, verbose=False):
309        """Clip geospatial data by a polygon
310
311        Input
312          polygon - Either a list of points, an Nx2 array or
313                    a Geospatial data object.
314          closed - (optional) determine whether points on boundary should be
315          regarded as belonging to the polygon (closed = True)
316          or not (closed = False). Default is True.
317
318        Output
319          New geospatial data object representing points inside
320          specified polygon.
321
322
323        Note - this method is non-destructive and leaves the data in 'self'
324        unchanged
325        """
326
327        from anuga.utilities.polygon import inside_polygon
328
329        if isinstance(polygon, Geospatial_data):
330            # Polygon is an object - extract points
331            polygon = polygon.get_data_points()
332
333        points = self.get_data_points()
334        inside_indices = inside_polygon(points, polygon, closed, verbose)
335
336        clipped_G = self.get_sample(inside_indices)
337
338        return clipped_G
339
340    ##
341    # @brief Clip points data by polygon, return points outside polygon.
342    # @param polygon The polygon to clip with.
343    # @param closed True if points on clip boundary are not included in result.
344    # @param verbose True if this function is verbose.
345    def clip_outside(self, polygon, closed=True, verbose=False):
346        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
347
348        Input
349          polygon - Either a list of points, an Nx2 array or
350                    a Geospatial data object.
351          closed - (optional) determine whether points on boundary should be
352          regarded as belonging to the polygon (closed = True)
353          or not (closed = False). Default is True.
354
355        Output
356          Geospatial data object representing point OUTSIDE specified polygon
357        """
358
359        from anuga.utilities.polygon import outside_polygon
360
361        if isinstance(polygon, Geospatial_data):
362            # Polygon is an object - extract points
363            polygon = polygon.get_data_points()
364
365        points = self.get_data_points()
366        outside_indices = outside_polygon(points, polygon, closed,verbose)
367
368        clipped_G = self.get_sample(outside_indices)
369
370        return clipped_G
371
372    ##
373    # @brief Get instance geo_reference data.
374    def get_geo_reference(self):
375        return self.geo_reference
376
377    ##
378    # @brief Get coordinates for all data points as an Nx2 array.
379    # @param absolute If True, return UTM, else relative to xll/yll corners.
380    # @param geo_reference If supplied, points are relative to it.
381    # @param as_lat_long If True, return points as lat/lon.
382    # @param isSouthHemisphere If True, return lat/lon points in S.Hemi.
383    # @return A set of data points, in appropriate form.
384    def get_data_points(self, absolute=True, geo_reference=None,
385                        as_lat_long=False, isSouthHemisphere=True):
386        """Get coordinates for all data points as an Nx2 array
387
388        If absolute is False returned coordinates are relative to the
389        internal georeference's xll and yll corners, otherwise
390        absolute UTM coordinates are returned.
391
392        If a geo_reference is passed the points are returned relative
393        to that geo_reference.
394
395        isSH (isSouthHemisphere) is only used when getting data
396        points "as_lat_long" is True and if FALSE will return lats and
397        longs valid for the Northern Hemisphere.
398
399        Default: absolute is True.
400        """
401
402        if as_lat_long is True:
403            msg = "Points need a zone to be converted into lats and longs"
404            assert self.geo_reference is not None, msg
405            zone = self.geo_reference.get_zone()
406            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
407            lats_longs = []
408            for point in self.get_data_points(True):
409                # UTMtoLL(northing, easting, zone,
410                lat_calced, long_calced = UTMtoLL(point[1], point[0],
411                                                  zone, isSouthHemisphere)
412                lats_longs.append((lat_calced, long_calced)) # to hash
413            return lats_longs
414
415        if absolute is True and geo_reference is None:
416            return self.geo_reference.get_absolute(self.data_points)
417        elif geo_reference is not None:
418            return geo_reference.change_points_geo_ref(self.data_points,
419                                                       self.geo_reference)
420        else:
421            # If absolute is False
422            return self.data_points
423
424    ##
425    # @brief Get value for attribute name.
426    # @param attribute_name Name to get value for.
427    # @note If name passed is None, return default attribute value.
428    def get_attributes(self, attribute_name=None):
429        """Return values for one named attribute.
430
431        If attribute_name is None, default_attribute_name is used
432        """
433
434        if attribute_name is None:
435            if self.default_attribute_name is not None:
436                attribute_name = self.default_attribute_name
437            else:
438                attribute_name = self.attributes.keys()[0]
439                # above line takes the first one from keys
440
441        if self.verbose is True:
442            print 'Using attribute %s' %attribute_name
443            print 'Available attributes: %s' %(self.attributes.keys())
444
445        msg = 'Attribute name %s does not exist in data set' % attribute_name
446        assert self.attributes.has_key(attribute_name), msg
447
448        return self.attributes[attribute_name]
449
450    ##
451    # @brief Get all instance attributes.
452    # @return The instance attribute dictionary, or None if no attributes.
453    def get_all_attributes(self):
454        """Return values for all attributes.
455        The return value is either None or a dictionary (possibly empty).
456        """
457
458        return self.attributes
459
460    ##
461    # @brief Override __add__() to allow addition of geospatial objects.
462    # @param self This object.
463    # @param other The second object.
464    # @return The new geospatial object.
465    def __add__(self, other):
466        """Returns the addition of 2 geospatical objects,
467        objects are concatencated to the end of each other
468
469        NOTE: doesn't add if objects contain different
470        attributes
471
472        Always return absolute points!
473        This also means, that if you add None to the object,
474        it will be turned into absolute coordinates
475
476        other can be None in which case nothing is added to self.
477        """
478
479        # find objects zone and checks if the same
480        geo_ref1 = self.get_geo_reference()
481        zone1 = geo_ref1.get_zone()
482
483        if other is not None:
484            geo_ref2 = other.get_geo_reference()
485            zone2 = geo_ref2.get_zone()
486            geo_ref1.reconcile_zones(geo_ref2)
487            new_points = num.concatenate((self.get_data_points(absolute=True),
488                                          other.get_data_points(absolute=True)),
489                                         axis = 0)
490
491            # Concatenate attributes if any
492            if self.attributes is None:
493                if other.attributes is not None:
494                    msg = 'Geospatial data must have the same '
495                    msg += 'attributes to allow addition.'
496                    raise Exception, msg
497
498                new_attributes = None
499            else:
500                new_attributes = {}
501                for x in self.attributes.keys():
502                    if other.attributes.has_key(x):
503                        attrib1 = self.attributes[x]
504                        attrib2 = other.attributes[x]
505                        new_attributes[x] = num.concatenate((attrib1, attrib2))
506                    else:
507                        msg = 'Geospatial data must have the same \n'
508                        msg += 'attributes to allow addition.'
509                        raise Exception, msg
510        else:
511            # other is None:
512            new_points = self.get_data_points(absolute=True)
513            new_attributes = self.attributes
514
515        # Instantiate new data object and return absolute coordinates
516        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
517        return Geospatial_data(new_points, new_attributes, new_geo_ref)
518
519    ##
520    # @brief Override the addition case where LHS isn't geospatial object.
521    # @param self This object.
522    # @param other The second object.
523    # @return The new geospatial object.
524    def __radd__(self, other):
525        """Handle cases like None + Geospatial_data(...)"""
526
527        return self + other
528
529    ###
530    #  IMPORT/EXPORT POINTS FILES
531    ###
532
533    ##
534    # @brief Import a .txt, .csv or .pts points data file.
535    # @param file_name
536    # @param delimiter
537    # @param verbose True if this function is to be verbose.
538    # @note Will throw IOError or SyntaxError if there is a problem.
539    def import_points_file(self, file_name, delimiter=None, verbose=False):
540        """ load an .txt, .csv or .pts file
541
542        Note: will throw an IOError/SyntaxError if it can't load the file.
543        Catch these!
544
545        Post condition: self.attributes dictionary has been set
546        """
547
548        if access(file_name, F_OK) == 0 :
549            msg = 'File %s does not exist or is not accessible' % file_name
550            raise IOError, msg
551
552        attributes = {}
553        if file_name[-4:]== ".pts":
554            try:
555                data_points, attributes, geo_reference = \
556                             _read_pts_file(file_name, verbose)
557            except IOError, e:
558                msg = 'Could not open file %s ' % file_name
559                raise IOError, msg
560        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
561            try:
562                data_points, attributes, geo_reference = \
563                             _read_csv_file(file_name, verbose)
564            except IOError, e:
565                # This should only be if a file is not found
566                msg = 'Could not open file %s. ' % file_name
567                msg += 'Check the file location.'
568                raise IOError, msg
569            except SyntaxError, e:
570                # This should only be if there is a format error
571                msg = 'Problem with format of file %s. \n' %file_name
572                msg += Error_message['IOError']
573                raise SyntaxError, msg
574        else:
575            msg = 'Extension %s is unknown' %file_name[-4:]
576            raise IOError, msg
577
578        self.data_points = data_points
579        self.attributes = attributes
580        self.geo_reference = geo_reference
581
582    ##
583    # @brief Write points data to a file (.csv or .pts).
584    # @param file_name Path to file to write.
585    # @param absolute ??
586    # @param as_lat_long ??
587    # @param isSouthHemisphere ??
588    def export_points_file(self, file_name, absolute=True,
589                           as_lat_long=False, isSouthHemisphere=True):
590
591        """write a points file as a text (.csv) or binary (.pts) file
592
593        file_name is the file name, including the extension
594        The point_dict is defined at the top of this file.
595
596        If absolute is True data the xll and yll are added to the points value
597        and the xll and yll of the geo_reference are set to 0.
598
599        If absolute is False data points at returned as relative to the xll
600        and yll and geo_reference remains uneffected
601
602        isSouthHemisphere: is only used when getting data
603        points "as_lat_long" is True and if FALSE will return lats and
604        longs valid for the Northern Hemisphere.
605        """
606
607        if (file_name[-4:] == ".pts"):
608            if absolute is True:
609                geo_ref = deepcopy(self.geo_reference)
610                geo_ref.xllcorner = 0
611                geo_ref.yllcorner = 0
612                _write_pts_file(file_name,
613                                self.get_data_points(absolute),
614                                self.get_all_attributes(),
615                                geo_ref)
616            else:
617                _write_pts_file(file_name,
618                                self.get_data_points(absolute),
619                                self.get_all_attributes(),
620                                self.get_geo_reference())
621
622        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
623            msg = "ERROR: trying to write a .txt file with relative data."
624            assert absolute, msg
625            _write_csv_file(file_name,
626                            self.get_data_points(absolute=True,
627                                                 as_lat_long=as_lat_long,
628                                  isSouthHemisphere=isSouthHemisphere),
629                            self.get_all_attributes(),
630                            as_lat_long=as_lat_long)
631
632        elif file_name[-4:] == ".urs" :
633            msg = "ERROR: Can not write a .urs file as a relative file."
634            assert absolute, msg
635            _write_urs_file(file_name,
636                            self.get_data_points(as_lat_long=True,
637                                  isSouthHemisphere=isSouthHemisphere))
638
639        else:
640            msg = 'Unknown file type %s ' %file_name
641            raise IOError, msg
642
643    ##
644    # @brief Get a subset of data that is referred to by 'indices'.
645    # @param indices A list of indices to select data subset with.
646    # @return A geospatial object containing data subset.
647    def get_sample(self, indices):
648        """ Returns a object which is a subset of the original
649        and the data points and attributes in this new object refer to
650        the indices provided
651
652        Input
653            indices- a list of integers that represent the new object
654        Output
655            New geospatial data object representing points specified by
656            the indices
657        """
658
659        # FIXME: add the geo_reference to this
660        points = self.get_data_points()
661        sampled_points = num.take(points, indices)
662
663        attributes = self.get_all_attributes()
664
665        sampled_attributes = {}
666        if attributes is not None:
667            for key, att in attributes.items():
668                sampled_attributes[key] = num.take(att, indices)
669
670        return Geospatial_data(sampled_points, sampled_attributes)
671
672    ##
673    # @brief Split one geospatial object into two.
674    # @param factor Relative size to make first result object.
675    # @param seed_num Random 'seed' - used only for unit test.
676    # @param verbose True if this function is to be verbose.
677    # @note Points in each result object are selected randomly.
678    def split(self, factor=0.5, seed_num=None, verbose=False):
679        """Returns two geospatial_data object, first is the size of the 'factor'
680        smaller the original and the second is the remainder. The two
681        new objects are disjoint sets of each other.
682
683        Points of the two new object have selected RANDOMLY.
684
685        This method create two lists of indices which are passed into
686        get_sample.  The lists are created using random numbers, and
687        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
688        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
689
690        Input -  the factor which to split the object, if 0.1 then 10% of the
691                 together object will be returned
692
693        Output - two geospatial_data objects that are disjoint sets of the
694                 original
695        """
696
697        i=0
698        self_size = len(self)
699        random_list = []
700        remainder_list = []
701        new_size = round(factor*self_size)
702
703        # Find unique random numbers
704        if verbose: print "make unique random number list and get indices"
705
706        total=num.array(range(self_size))
707        total_list = total.tolist()
708
709        if verbose: print "total list len", len(total_list)
710
711        # There will be repeated random numbers however will not be a
712        # problem as they are being 'pop'ed out of array so if there
713        # are two numbers the same they will pop different indicies,
714        # still basically random
715        ## create list of non-unquie random numbers
716        if verbose: print "create random numbers list %s long" %new_size
717
718        # Set seed if provided, mainly important for unit test!
719        # plus recalcule seed when no seed provided.
720        if seed_num != None:
721            seed(seed_num, seed_num)
722        else:
723            seed()
724
725        if verbose: print "seed:", get_seed()
726
727        random_num = randint(0, self_size-1, (int(new_size),))
728        random_num = random_num.tolist()
729
730        # need to sort and reverse so the pop() works correctly
731        random_num.sort()
732        random_num.reverse()
733
734        if verbose: print "make random number list and get indices"
735
736        j=0
737        k=1
738        remainder_list = total_list[:]
739
740        # pops array index (random_num) from remainder_list
741        # (which starts as the
742        # total_list and appends to random_list
743        random_num_len = len(random_num)
744        for i in random_num:
745            random_list.append(remainder_list.pop(i))
746            j += 1
747            # prints progress
748            if verbose and round(random_num_len/10*k) == j:
749                print '(%s/%s)' % (j, random_num_len)
750                k += 1
751
752        # FIXME: move to tests, it might take a long time
753        # then create an array of random lenght between 500 and 1000,
754        # and use a random factor between 0 and 1
755        # setup for assertion
756        test_total = random_list[:]
757        test_total.extend(remainder_list)
758        test_total.sort()
759        msg = 'The two random lists made from the original list when added ' \
760              'together DO NOT equal the original list'
761        assert total_list == test_total, msg
762
763        # Get new samples
764        if verbose: print "get values of indices for random list"
765        G1 = self.get_sample(random_list)
766        if verbose: print "get values of indices for opposite of random list"
767        G2 = self.get_sample(remainder_list)
768
769        return G1, G2
770
771    ##
772    # @brief Allow iteration over this object.
773    def __iter__(self):
774        """Read in the header, number_of_points and save the
775        file pointer position
776        """
777        from Scientific.IO.NetCDF import NetCDFFile
778        # FIXME - what to do if the file isn't there
779
780        # FIXME (Ole): Shouldn't this go into the constructor?
781        # This method acts like the constructor when blocking.
782        # ... and shouldn't it be called block_size?
783        #
784        if self.max_read_lines is None:
785            self.max_read_lines = MAX_READ_LINES
786
787        if self.file_name[-4:] == ".pts":
788            # See if the file is there.  Throw a QUIET IO error if it isn't
789            fd = open(self.file_name,'r')
790            fd.close()
791
792            # Throws prints to screen if file not present
793            self.fid = NetCDFFile(self.file_name, netcdf_mode_r)
794
795            (self.blocking_georef,
796             self.blocking_keys,
797             self.number_of_points) = _read_pts_file_header(self.fid,
798                                                            self.verbose)
799            self.start_row = 0
800            self.last_row = self.number_of_points
801            self.show_verbose = 0
802            self.verbose_block_size = (self.last_row + 10)/10
803            self.block_number = 0
804            self.number_of_blocks = self.number_of_points/self.max_read_lines
805            # This computes the number of full blocks. The last block may be
806            # smaller and won't be ircluded in this estimate.
807
808            if self.verbose is True:
809                print 'Reading %d points (in ~%d blocks) from file %s. ' \
810                      % (self.number_of_points,
811                         self.number_of_blocks,
812                         self.file_name),
813                print 'Each block consists of %d data points' \
814                      % self.max_read_lines
815
816        else:
817            # Assume the file is a csv file
818            file_pointer = open(self.file_name)
819            self.header, self.file_pointer = _read_csv_file_header(file_pointer)
820            self.blocking_georef = None # Used for reconciling zones
821
822        return self
823
824    ##
825    # @brief Read another block into the instance.
826    def next(self):
827        """read a block, instanciate a new geospatial and return it"""
828
829        if self.file_name[-4:] == ".pts":
830            if self.start_row == self.last_row:
831                # Read the end of the file last iteration
832                # Remove blocking attributes
833                self.fid.close()
834                del self.max_read_lines
835                del self.blocking_georef
836                del self.last_row
837                del self.start_row
838                del self.blocking_keys
839                del self.fid
840                raise StopIteration
841            fin_row = self.start_row + self.max_read_lines
842            if fin_row > self.last_row:
843                fin_row = self.last_row
844
845            if self.verbose is True:
846                if self.show_verbose >= self.start_row \
847                   and self.show_verbose < fin_row:
848                    print 'Reading block %d (points %d to %d) out of %d'\
849                          %(self.block_number,
850                            self.start_row,
851                            fin_row,
852                            self.number_of_blocks)
853
854                    self.show_verbose += max(self.max_read_lines,
855                                             self.verbose_block_size)
856
857
858            # Read next block
859            pointlist, att_dict, = _read_pts_file_blocking(self.fid,
860                                                           self.start_row,
861                                                           fin_row,
862                                                           self.blocking_keys)
863
864            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
865            self.start_row = fin_row
866
867            self.block_number += 1
868
869        else:
870            # Assume the file is a csv file
871            try:
872                (pointlist,
873                 att_dict,
874                 geo_ref,
875                 self.file_pointer) = \
876                        _read_csv_file_blocking(self.file_pointer,
877                                                self.header[:],
878                                                max_read_lines=\
879                                                    self.max_read_lines,
880                                                verbose=self.verbose)
881
882                # Check that the zones haven't changed.
883                if geo_ref is not None:
884                    geo_ref.reconcile_zones(self.blocking_georef)
885                    self.blocking_georef = geo_ref
886                elif self.blocking_georef is not None:
887                    msg = 'Geo reference given, then not given.'
888                    msg += ' This should not happen.'
889                    raise ValueError, msg
890                geo = Geospatial_data(pointlist, att_dict, geo_ref)
891            except StopIteration:
892                self.file_pointer.close()
893                del self.header
894                del self.file_pointer
895                raise StopIteration
896            except ANUGAError:
897                self.file_pointer.close()
898                del self.header
899                del self.file_pointer
900                raise
901            except SyntaxError:
902                self.file_pointer.close()
903                del self.header
904                del self.file_pointer
905                # This should only be if there is a format error
906                msg = 'Could not open file %s. \n' % self.file_name
907                msg += Error_message['IOError']
908                raise SyntaxError, msg
909        return geo
910
911
912##################### Error messages ###########
913Error_message = {}
914Em = Error_message
915Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
916Em['IOError'] += "        1st line:     [column names]\n"
917Em['IOError'] += "        other lines:  [x value], [y value], [attributes]\n"
918Em['IOError'] += "\n"
919Em['IOError'] += "           for example:\n"
920Em['IOError'] += "           x, y, elevation, friction\n"
921Em['IOError'] += "           0.6, 0.7, 4.9, 0.3\n"
922Em['IOError'] += "           1.9, 2.8, 5, 0.3\n"
923Em['IOError'] += "           2.7, 2.4, 5.2, 0.3\n"
924Em['IOError'] += "\n"
925Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
926Em['IOError'] += "The attribute values must be numeric.\n"
927
928##
929# @brief ??
930# @param latitudes ??
931# @param longitudes ??
932# @param geo_reference ??
933# @param data_points ??
934# @param points_are_lats_longs ??
935def _set_using_lat_long(latitudes,
936                        longitudes,
937                        geo_reference,
938                        data_points,
939                        points_are_lats_longs):
940    """If the points has lat long info, assume it is in (lat, long) order."""
941
942    if geo_reference is not None:
943        msg = "A georeference is specified yet latitude and longitude " \
944              "are also specified!"
945        raise ValueError, msg
946
947    if data_points is not None and not points_are_lats_longs:
948        msg = "Data points are specified yet latitude and longitude are " \
949              "also specified."
950        raise ValueError, msg
951
952    if points_are_lats_longs:
953        if data_points is None:
954            msg = "Data points are not specified."
955            raise ValueError, msg
956        lats_longs = ensure_numeric(data_points)
957        latitudes = num.ravel(lats_longs[:,0:1])
958        longitudes = num.ravel(lats_longs[:,1:])
959
960    if latitudes is None and longitudes is None:
961        msg = "Latitudes and Longitudes are not specified."
962        raise ValueError, msg
963
964    if latitudes is None:
965        msg = "Longitudes are specified yet latitudes aren't."
966        raise ValueError, msg
967
968    if longitudes is None:
969        msg = "Latitudes are specified yet longitudes aren't."
970        raise ValueError, msg
971
972    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
973                                                    longitudes=longitudes)
974    return data_points, Geo_reference(zone=zone)
975
976
977##
978# @brief Read a .pts data file.
979# @param file_name Path to file to read.
980# @param verbose True if this function is to be verbose.
981# @return (pointlist, attributes, geo_reference)
982def _read_pts_file(file_name, verbose=False):
983    """Read .pts NetCDF file
984
985    Return a dic of array of points, and dic of array of attribute
986    eg
987    dic['points'] = [[1.0,2.0],[3.0,5.0]]
988    dic['attributelist']['elevation'] = [[7.0,5.0]]
989    """
990
991    from Scientific.IO.NetCDF import NetCDFFile
992
993    if verbose: print 'Reading ', file_name
994
995    # See if the file is there.  Throw a QUIET IO error if it isn't
996    fd = open(file_name,'r')
997    fd.close()
998
999    # Throws prints to screen if file not present
1000    fid = NetCDFFile(file_name, netcdf_mode_r)
1001
1002    pointlist = num.array(fid.variables['points'])
1003    keys = fid.variables.keys()
1004
1005    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1006
1007    try:
1008        keys.remove('points')
1009    except IOError, e:
1010        fid.close()
1011        msg = "Expected keyword 'points' but could not find it"
1012        raise IOError, msg
1013
1014    attributes = {}
1015    for key in keys:
1016        if verbose: print "reading attribute '%s'" % key
1017
1018        attributes[key] = num.array(fid.variables[key])
1019
1020    try:
1021        geo_reference = Geo_reference(NetCDFObject=fid)
1022    except AttributeError, e:
1023        geo_reference = None
1024
1025    fid.close()
1026
1027    return pointlist, attributes, geo_reference
1028
1029
1030##
1031# @brief Read a .csv data file.
1032# @param file_name Path to the .csv file to read.
1033# @param verbose True if this function is to be verbose.
1034def _read_csv_file(file_name, verbose=False):
1035    """Read .csv file
1036
1037    Return a dic of array of points, and dic of array of attribute
1038    eg
1039    dic['points'] = [[1.0,2.0],[3.0,5.0]]
1040    dic['attributelist']['elevation'] = [[7.0,5.0]]
1041    """
1042
1043    file_pointer = open(file_name)
1044    header, file_pointer = _read_csv_file_header(file_pointer)
1045    try:
1046        (pointlist,
1047         att_dict,
1048         geo_ref,
1049         file_pointer) = _read_csv_file_blocking(file_pointer,
1050                                                 header,
1051                                                 max_read_lines=1e30)
1052                                    # If the file is bigger that this, block..
1053                                    # FIXME (Ole) What's up here?
1054    except ANUGAError:
1055        file_pointer.close()
1056        raise
1057
1058    file_pointer.close()
1059
1060    return pointlist, att_dict, geo_ref
1061
1062
1063##
1064# @brief Read a .csv file header.
1065# @param file_pointer Open descriptor of the file to read.
1066# @param delimiter Header line delimiter string, split on this string.
1067# @param verbose True if this function is to be verbose.
1068# @return A tuple of (<cleaned header string>, <input file_pointer>)
1069
1070CSV_DELIMITER = ','
1071
1072def _read_csv_file_header(file_pointer,
1073                          delimiter=CSV_DELIMITER,
1074                          verbose=False):
1075    """Read the header of a .csv file
1076    Return a list of the header names
1077    """
1078
1079    line = file_pointer.readline().strip()
1080    header = clean_line(line, delimiter)
1081
1082    return header, file_pointer
1083
1084##
1085# @brief Read a .csv file, with blocking.
1086# @param file_pointer Open descriptor of the file to read.
1087# @param header List of already read .csv header fields.
1088# @param delimiter Delimiter string header was split on.
1089# @param max_read_lines The max number of lines to read before blocking.
1090# @param verbose True if this function is to be verbose.
1091# @note Will throw IndexError, SyntaxError exceptions.
1092def _read_csv_file_blocking(file_pointer,
1093                            header,
1094                            delimiter=CSV_DELIMITER,
1095                            max_read_lines=MAX_READ_LINES,
1096                            verbose=False):
1097    """Read the body of a .csv file.
1098    header: The list header of the csv file, with the x and y labels.
1099    """
1100
1101    points = []
1102    pointattributes = []
1103    att_dict = {}
1104
1105    # This is to remove the x and y headers.
1106    header = header[:]
1107    try:
1108        x_header = header.pop(0)
1109        y_header = header.pop(0)
1110    except IndexError:
1111        # if there are not two columns this will occur.
1112        # eg if it is a space seperated file
1113        raise SyntaxError
1114
1115    read_lines = 0
1116    while read_lines < max_read_lines:
1117        line = file_pointer.readline()
1118        numbers = clean_line(line, delimiter)
1119        if len(numbers) <= 1:
1120            break
1121        if line[0] == '#':
1122            continue
1123
1124        read_lines += 1
1125
1126        try:
1127            x = float(numbers[0])
1128            y = float(numbers[1])
1129            points.append([x,y])
1130            numbers.pop(0)
1131            numbers.pop(0)
1132            if len(header) != len(numbers):
1133                file_pointer.close()
1134                msg = "File load error. " \
1135                      "There might be a problem with the file header."
1136                raise SyntaxError, msg
1137            for i,n in enumerate(numbers):
1138                n.strip()
1139                if n != '\n' and n != '':
1140                    #attributes.append(float(n))
1141                    att_dict.setdefault(header[i],[]).append(float(n))
1142        except ValueError:
1143            raise SyntaxError
1144
1145    if points == []:
1146        raise StopIteration
1147
1148    pointlist = num.array(points, num.float)
1149    for key in att_dict.keys():
1150        att_dict[key] = num.array(att_dict[key], num.float)
1151
1152    # Do stuff here so the info is in lat's and longs
1153    geo_ref = None
1154    x_header = lower(x_header[:3])
1155    y_header = lower(y_header[:3])
1156    if (x_header == 'lon' or  x_header == 'lat') \
1157       and (y_header == 'lon' or  y_header == 'lat'):
1158        if x_header == 'lon':
1159            longitudes = num.ravel(pointlist[:,0:1])
1160            latitudes = num.ravel(pointlist[:,1:])
1161        else:
1162            latitudes = num.ravel(pointlist[:,0:1])
1163            longitudes = num.ravel(pointlist[:,1:])
1164
1165        pointlist, geo_ref = _set_using_lat_long(latitudes,
1166                                                 longitudes,
1167                                                 geo_reference=None,
1168                                                 data_points=None,
1169                                                 points_are_lats_longs=False)
1170
1171    return pointlist, att_dict, geo_ref, file_pointer
1172
1173
1174##
1175# @brief Read a .pts file header.
1176# @param fid Handle to the open .pts file.
1177# @param verbose True if the function is to be verbose.
1178# @return (geo_reference, keys, fid.dimensions['number_of_points'])
1179# @note Will throw IOError and AttributeError exceptions.
1180def _read_pts_file_header(fid, verbose=False):
1181    """Read the geo_reference and number_of_points from a .pts file"""
1182
1183    keys = fid.variables.keys()
1184    try:
1185        keys.remove('points')
1186    except IOError, e:
1187        fid.close()
1188        msg = "Expected keyword 'points' but could not find it."
1189        raise IOError, msg
1190
1191    if verbose: print 'Got %d variables: %s' % (len(keys), keys)
1192
1193    try:
1194        geo_reference = Geo_reference(NetCDFObject=fid)
1195    except AttributeError, e:
1196        geo_reference = None
1197
1198    return geo_reference, keys, fid.dimensions['number_of_points']
1199
1200
1201##
1202# @brief Read the body of a .csf file, with blocking.
1203# @param fid Handle to already open file.
1204# @param start_row Start row index of points to return.
1205# @param fin_row End row index of points to return.
1206# @param keys Iterable of keys to return.
1207# @return Tuple of (pointlist, attributes).
1208def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1209    """Read the body of a .csv file."""
1210
1211    pointlist = num.array(fid.variables['points'][start_row:fin_row])
1212
1213    attributes = {}
1214    for key in keys:
1215        attributes[key] = num.array(fid.variables[key][start_row:fin_row])
1216
1217    return pointlist, attributes
1218
1219
1220##
1221# @brief Write a .pts data file.
1222# @param file_name Path to the file to write.
1223# @param write_data_points Data points to write.
1224# @param write_attributes Attributes to write.
1225# @param write_geo_reference Georef to write.
1226def _write_pts_file(file_name,
1227                    write_data_points,
1228                    write_attributes=None,
1229                    write_geo_reference=None):
1230    """Write .pts NetCDF file
1231
1232    NOTE: Below might not be valid ask Duncan : NB 5/2006
1233
1234    WARNING: This function mangles the point_atts data structure
1235    # F??ME: (DSG)This format has issues.
1236    # There can't be an attribute called points
1237    # consider format change
1238    # method changed by NB not sure if above statement is correct
1239
1240    should create new test for this
1241    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1242    for key in point_atts.keys():
1243        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1244        assert key in legal_keys, msg
1245    """
1246
1247    from Scientific.IO.NetCDF import NetCDFFile
1248
1249    # NetCDF file definition
1250    outfile = NetCDFFile(file_name, netcdf_mode_w)
1251
1252    # Create new file
1253    outfile.institution = 'Geoscience Australia'
1254    outfile.description = 'NetCDF format for compact and portable storage ' \
1255                          'of spatial point data'
1256
1257    # Dimension definitions
1258    shape = write_data_points.shape[0]
1259    outfile.createDimension('number_of_points', shape)
1260    outfile.createDimension('number_of_dimensions', 2) # This is 2d data
1261
1262    # Variable definition
1263    outfile.createVariable('points', netcdf_float, ('number_of_points',
1264                                                    'number_of_dimensions'))
1265
1266    # create variables
1267    outfile.variables['points'][:] = write_data_points
1268
1269    if write_attributes is not None:
1270        for key in write_attributes.keys():
1271            outfile.createVariable(key, netcdf_float, ('number_of_points',))
1272            outfile.variables[key][:] = write_attributes[key]
1273
1274    if write_geo_reference is not None:
1275        write_NetCDF_georeference(write_geo_reference, outfile)
1276
1277    outfile.close()
1278
1279
1280##
1281# @brief Write a .csv data file.
1282# @param file_name Path to the file to write.
1283# @param write_data_points Data points to write.
1284# @param write_attributes Attributes to write.
1285# @param as_lat_long True if points are lat/lon, else x/y.
1286# @param delimiter The CSV delimiter to use.
1287def _write_csv_file(file_name,
1288                    write_data_points,
1289                    write_attributes=None,
1290                    as_lat_long=False,
1291                    delimiter=','):
1292    """Write a .csv file."""
1293
1294    points = write_data_points
1295    pointattributes = write_attributes
1296
1297    fd = open(file_name, 'w')
1298
1299    if as_lat_long:
1300        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1301    else:
1302        titlelist = "x" + delimiter + "y"  + delimiter
1303
1304    if pointattributes is not None:
1305        for title in pointattributes.keys():
1306            titlelist = titlelist + title + delimiter
1307        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1308
1309    fd.write(titlelist + "\n")
1310
1311    # <x/lat> <y/long> [attributes]
1312    for i, vert in enumerate( points):
1313        if pointattributes is not None:
1314            attlist = ","
1315            for att in pointattributes.keys():
1316                attlist = attlist + str(pointattributes[att][i]) + delimiter
1317            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1318            attlist.strip()
1319        else:
1320            attlist = ''
1321
1322        fd.write(str(vert[0]) + delimiter + str(vert[1]) + attlist + "\n")
1323
1324    fd.close()
1325
1326
1327##
1328# @brief Write a URS file.
1329# @param file_name The path of the file to write.
1330# @param points
1331# @param delimiter
1332def _write_urs_file(file_name, points, delimiter=' '):
1333    """Write a URS format file.
1334    export a file, file_name, with the urs format
1335    the data points are in lats and longs
1336    """
1337
1338    fd = open(file_name, 'w')
1339
1340    # first line is # points
1341    fd.write(str(len(points)) + "\n")
1342
1343    # <lat> <long> <id#>
1344    for i, vert in enumerate( points):
1345        fd.write(str(round(vert[0],7)) + delimiter +
1346                 str(round(vert[1],7)) + delimiter + str(i) + "\n")
1347
1348    fd.close()
1349
1350
1351##
1352# @brief ??
1353# @param point_atts ??
1354# @return ??
1355def _point_atts2array(point_atts):
1356    point_atts['pointlist'] = num.array(point_atts['pointlist'], num.float)
1357
1358    for key in point_atts['attributelist'].keys():
1359        point_atts['attributelist'][key] = \
1360                num.array(point_atts['attributelist'][key], num.float)
1361
1362    return point_atts
1363
1364
1365##
1366# @brief Convert geospatial object to a points dictionary.
1367# @param geospatial_data The geospatial object to convert.
1368# @return A points dictionary.
1369def geospatial_data2points_dictionary(geospatial_data):
1370    """Convert geospatial data to points_dictionary"""
1371
1372    points_dictionary = {}
1373    points_dictionary['pointlist'] = geospatial_data.data_points
1374
1375    points_dictionary['attributelist'] = {}
1376
1377    for attribute_name in geospatial_data.attributes.keys():
1378        val = geospatial_data.attributes[attribute_name]
1379        points_dictionary['attributelist'][attribute_name] = val
1380
1381    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1382
1383    return points_dictionary
1384
1385
1386##
1387# @brief Convert a points dictionary to a geospatial object.
1388# @param points_dictionary A points dictionary to convert.
1389def points_dictionary2geospatial_data(points_dictionary):
1390    """Convert points_dictionary to geospatial data object"""
1391
1392    msg = 'Points dictionary must have key pointlist'
1393    assert points_dictionary.has_key('pointlist'), msg
1394
1395    msg = 'Points dictionary must have key attributelist'
1396    assert points_dictionary.has_key('attributelist'), msg
1397
1398    if points_dictionary.has_key('geo_reference'):
1399        geo = points_dictionary['geo_reference']
1400    else:
1401        geo = None
1402
1403    return Geospatial_data(points_dictionary['pointlist'],
1404                           points_dictionary['attributelist'],
1405                           geo_reference = geo)
1406
1407
1408##
1409# @brief Ensure that points are in absolute coordinates.
1410# @param points A list or array of points to check, or geospatial object.
1411# @param geo_reference If supplied,
1412# @return ??
1413def ensure_absolute(points, geo_reference=None):
1414    """Ensure that points are in absolute coordinates.
1415
1416    This function inputs several formats and
1417    outputs one format. - a numeric array of absolute points.
1418
1419    Input formats are;
1420      points: List or numeric array of coordinate pairs [xi, eta] of
1421              points or geospatial object or points file name
1422
1423    mesh_origin: A geo_reference object or 3-tuples consisting of
1424                 UTM zone, easting and northing.
1425                 If specified vertex coordinates are assumed to be
1426                 relative to their respective origins.
1427    """
1428
1429    # Input check
1430    if isinstance(points, basestring):
1431        # It's a string - assume it is a point file
1432        points = Geospatial_data(file_name=points)
1433
1434    if isinstance(points, Geospatial_data):
1435        points = points.get_data_points(absolute=True)
1436        msg = 'Use a Geospatial_data object or a mesh origin, not both.'
1437        assert geo_reference == None, msg
1438    else:
1439        points = ensure_numeric(points, num.float)
1440
1441    # Sort of geo_reference and convert points
1442    if geo_reference is None:
1443        geo = None # Geo_reference()
1444    else:
1445        if isinstance(geo_reference, Geo_reference):
1446            geo = geo_reference
1447        else:
1448            geo = Geo_reference(geo_reference[0],
1449                                geo_reference[1],
1450                                geo_reference[2])
1451        points = geo.get_absolute(points)
1452
1453    return points
1454
1455
1456##
1457# @brief
1458# @param points
1459# @param geo_reference
1460# @return A geospatial object.
1461def ensure_geospatial(points, geo_reference=None):
1462    """Convert various data formats to a geospatial_data instance.
1463
1464    Inputed formats are;
1465    points:      List or numeric array of coordinate pairs [xi, eta] of
1466                 points or geospatial object
1467
1468    mesh_origin: A geo_reference object or 3-tuples consisting of
1469                 UTM zone, easting and northing.
1470                 If specified vertex coordinates are assumed to be
1471                 relative to their respective origins.
1472    """
1473
1474    # Input check
1475    if isinstance(points, Geospatial_data):
1476        msg = "Use a Geospatial_data object or a mesh origin, not both."
1477        assert geo_reference is None, msg
1478        return points
1479    else:
1480        # List or numeric array of absolute points
1481        points = ensure_numeric(points, num.float)
1482
1483    # Sort out geo reference
1484    if geo_reference is None:
1485        geo = None
1486    else:
1487        if isinstance(geo_reference, Geo_reference):
1488            geo = geo_reference
1489        else:
1490            geo = Geo_reference(geo_reference[0],
1491                                geo_reference[1],
1492                                geo_reference[2])
1493
1494    # Create Geospatial_data object with appropriate geo reference and return
1495    points = Geospatial_data(data_points=points, geo_reference=geo)
1496
1497    return points
1498
1499
1500##
1501# @brief
1502# @param data_file
1503# @param alpha_list
1504# @param mesh_file
1505# @param boundary_poly
1506# @param mesh_resolution
1507# @param north_boundary
1508# @param south_boundary
1509# @param east_boundary
1510# @param west_boundary
1511# @param plot_name
1512# @param split_factor
1513# @param seed_num
1514# @param cache
1515# @param verbose
1516def find_optimal_smoothing_parameter(data_file,
1517                                     alpha_list=None,
1518                                     mesh_file=None,
1519                                     boundary_poly=None,
1520                                     mesh_resolution=100000,
1521                                     north_boundary=None,
1522                                     south_boundary=None,
1523                                     east_boundary=None,
1524                                     west_boundary=None,
1525                                     plot_name='all_alphas',
1526                                     split_factor=0.1,
1527                                     seed_num=None,
1528                                     cache=False,
1529                                     verbose=False):
1530    """Removes a small random sample of points from 'data_file'.
1531    Then creates models with different alpha values from 'alpha_list' and
1532    cross validates the predicted value to the previously removed point data.
1533    Returns the alpha value which has the smallest covariance.
1534
1535    data_file: must not contain points outside the boundaries defined
1536               and it either a pts, txt or csv file.
1537
1538    alpha_list: the alpha values to test in a single list
1539
1540    mesh_file: name of the created mesh file or if passed in will read it.
1541               NOTE, if there is a mesh file mesh_resolution,
1542               north_boundary, south... etc will be ignored.
1543
1544    mesh_resolution: the maximum area size for a triangle
1545
1546    north_boundary... west_boundary: the value of the boundary
1547
1548    plot_name: the name for the plot contain the results
1549
1550    seed_num: the seed to the random number generator
1551
1552    USAGE:
1553        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1554                                             alpha_list=[0.0001, 0.01, 1],
1555                                             mesh_file=None,
1556                                             mesh_resolution=3,
1557                                             north_boundary=5,
1558                                             south_boundary=-5,
1559                                             east_boundary=5,
1560                                             west_boundary=-5,
1561                                             plot_name='all_alphas',
1562                                             seed_num=100000,
1563                                             verbose=False)
1564
1565    OUTPUT: returns the minumum normalised covalance calculate AND the
1566           alpha that created it. PLUS writes a plot of the results
1567
1568    NOTE: code will not work if the data_file extent is greater than the
1569    boundary_polygon or any of the boundaries, eg north_boundary...west_boundary
1570    """
1571
1572    from anuga.shallow_water import Domain
1573    from anuga.geospatial_data.geospatial_data import Geospatial_data
1574    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1575    from anuga.utilities.numerical_tools import cov
1576    from anuga.utilities.polygon import is_inside_polygon
1577    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1578
1579
1580    attribute_smoothed='elevation'
1581
1582    if mesh_file is None:
1583        if verbose: print "building mesh"
1584        mesh_file = 'temp.msh'
1585
1586        if north_boundary is None or south_boundary is None \
1587           or east_boundary is None or west_boundary is None:
1588            no_boundary = True
1589        else:
1590            no_boundary = False
1591
1592        if no_boundary is True:
1593            msg = 'All boundaries must be defined'
1594            raise Expection, msg
1595
1596        poly_topo = [[east_boundary,south_boundary],
1597                     [east_boundary,north_boundary],
1598                     [west_boundary,north_boundary],
1599                     [west_boundary,south_boundary]]
1600
1601        create_mesh_from_regions(poly_topo,
1602                                 boundary_tags={'back': [2],
1603                                                'side': [1,3],
1604                                                'ocean': [0]},
1605                                 maximum_triangle_area=mesh_resolution,
1606                                 filename=mesh_file,
1607                                 use_cache=cache,
1608                                 verbose=verbose)
1609
1610    else: # if mesh file provided
1611        # test mesh file exists?
1612        if verbose: "reading from file: %s" % mesh_file
1613        if access(mesh_file,F_OK) == 0:
1614            msg = "file %s doesn't exist!" % mesh_file
1615            raise IOError, msg
1616
1617    # split topo data
1618    if verbose: print 'Reading elevation file: %s' % data_file
1619    G = Geospatial_data(file_name = data_file)
1620    if verbose: print 'Start split'
1621    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1622    if verbose: print 'Finish split'
1623    points = G_small.get_data_points()
1624
1625    if verbose: print "Number of points in sample to compare: ", len(points)
1626
1627    if alpha_list == None:
1628        alphas = [0.001,0.01,100]
1629        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1630         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1631    else:
1632        alphas = alpha_list
1633
1634    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1635    # 4 onwards is the elevation_predicted using the alpha, which will
1636    # be compared later against the real removed data
1637    data = num.array([], dtype=num.float)
1638
1639    data=num.resize(data, (len(points), 3+len(alphas)))
1640
1641    # gets relative point from sample
1642    data[:,0] = points[:,0]
1643    data[:,1] = points[:,1]
1644    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1645    data[:,2] = elevation_sample
1646
1647    normal_cov=num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1648
1649    if verbose: print 'Setup computational domains with different alphas'
1650
1651    for i, alpha in enumerate(alphas):
1652        # add G_other data to domains with different alphas
1653        if verbose:
1654            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1655        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1656        if verbose: print domain.statistics()
1657        domain.set_quantity(attribute_smoothed,
1658                            geospatial_data=G_other,
1659                            use_cache=cache,
1660                            verbose=verbose,
1661                            alpha=alpha)
1662
1663        # Convert points to geospatial data for use with get_values below
1664        points_geo = Geospatial_data(points, domain.geo_reference)
1665
1666        # returns the predicted elevation of the points that were "split" out
1667        # of the original data set for one particular alpha
1668        if verbose: print 'Get predicted elevation for location to be compared'
1669        elevation_predicted = \
1670                domain.quantities[attribute_smoothed].\
1671                    get_values(interpolation_points=points_geo)
1672
1673        # add predicted elevation to array that starts with x, y, z...
1674        data[:,i+3] = elevation_predicted
1675
1676        sample_cov = cov(elevation_sample)
1677        ele_cov = cov(elevation_sample - elevation_predicted)
1678        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1679
1680        if verbose:
1681            print 'Covariance for alpha ', normal_cov[i][0], '= ', \
1682                      normal_cov[i][1]
1683            print '-------------------------------------------- \n'
1684
1685    normal_cov0 = normal_cov[:,0]
1686    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1687
1688    if plot_name is not None:
1689        from pylab import savefig, semilogx, loglog
1690
1691        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1692        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1693        savefig(plot_name, dpi=300)
1694
1695    if mesh_file == 'temp.msh':
1696        remove(mesh_file)
1697
1698    if verbose:
1699        print 'Final results:'
1700        for i, alpha in enumerate(alphas):
1701            print 'covariance for alpha %s = %s ' \
1702                  % (normal_cov[i][0], normal_cov[i][1])
1703        print '\n Optimal alpha is: %s ' \
1704              % normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1], 0]
1705
1706    # covariance and optimal alpha
1707    return (min(normal_cov_new[:,1]),
1708            normal_cov_new[(num.argmin(normal_cov_new,axis=0))[1],0])
1709
1710
1711##
1712# @brief
1713# @param data_file
1714# @param alpha_list
1715# @param mesh_file
1716# @param boundary_poly
1717# @param mesh_resolution
1718# @param north_boundary
1719# @param south_boundary
1720# @param east_boundary
1721# @param west_boundary
1722# @param plot_name
1723# @param split_factor
1724# @param seed_num
1725# @param cache
1726# @param verbose
1727def old_find_optimal_smoothing_parameter(data_file,
1728                                         alpha_list=None,
1729                                         mesh_file=None,
1730                                         boundary_poly=None,
1731                                         mesh_resolution=100000,
1732                                         north_boundary=None,
1733                                         south_boundary=None,
1734                                         east_boundary=None,
1735                                         west_boundary=None,
1736                                         plot_name='all_alphas',
1737                                         split_factor=0.1,
1738                                         seed_num=None,
1739                                         cache=False,
1740                                         verbose=False):
1741    """
1742    data_file: must not contain points outside the boundaries defined
1743               and it either a pts, txt or csv file.
1744
1745    alpha_list: the alpha values to test in a single list
1746
1747    mesh_file: name of the created mesh file or if passed in will read it.
1748               NOTE, if there is a mesh file mesh_resolution,
1749               north_boundary, south... etc will be ignored.
1750
1751    mesh_resolution: the maximum area size for a triangle
1752
1753    north_boundary... west_boundary: the value of the boundary
1754
1755    plot_name: the name for the plot contain the results
1756
1757    seed_num: the seed to the random number generator
1758
1759    USAGE:
1760        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1761                                             alpha_list=[0.0001, 0.01, 1],
1762                                             mesh_file=None,
1763                                             mesh_resolution=3,
1764                                             north_boundary=5,
1765                                             south_boundary=-5,
1766                                             east_boundary=5,
1767                                             west_boundary=-5,
1768                                             plot_name='all_alphas',
1769                                             seed_num=100000,
1770                                             verbose=False)
1771
1772    OUTPUT: returns the minumum normalised covalance calculate AND the
1773            alpha that created it. PLUS writes a plot of the results
1774
1775    NOTE: code will not work if the data_file extend is greater than the
1776          boundary_polygon or the north_boundary...west_boundary
1777    """
1778
1779    from anuga.shallow_water import Domain
1780    from anuga.geospatial_data.geospatial_data import Geospatial_data
1781    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1782    from anuga.utilities.numerical_tools import cov
1783    from anuga.utilities.polygon import is_inside_polygon
1784    from anuga.fit_interpolate.benchmark_least_squares import mem_usage
1785
1786
1787    attribute_smoothed = 'elevation'
1788
1789    if mesh_file is None:
1790        mesh_file = 'temp.msh'
1791
1792        if north_boundary is None or south_boundary is None \
1793           or east_boundary is None or west_boundary is None:
1794            no_boundary = True
1795        else:
1796            no_boundary = False
1797
1798        if no_boundary is True:
1799            msg = 'All boundaries must be defined'
1800            raise Expection, msg
1801
1802        poly_topo = [[east_boundary,south_boundary],
1803                     [east_boundary,north_boundary],
1804                     [west_boundary,north_boundary],
1805                     [west_boundary,south_boundary]]
1806
1807        create_mesh_from_regions(poly_topo,
1808                                 boundary_tags={'back': [2],
1809                                                'side': [1,3],
1810                                                'ocean': [0]},
1811                                 maximum_triangle_area=mesh_resolution,
1812                                 filename=mesh_file,
1813                                 use_cache=cache,
1814                                 verbose=verbose)
1815
1816    else: # if mesh file provided
1817        # test mesh file exists?
1818        if access(mesh_file,F_OK) == 0:
1819            msg = "file %s doesn't exist!" % mesh_file
1820            raise IOError, msg
1821
1822    # split topo data
1823    G = Geospatial_data(file_name=data_file)
1824    if verbose: print 'start split'
1825    G_small, G_other = G.split(split_factor, seed_num, verbose=verbose)
1826    if verbose: print 'finish split'
1827    points = G_small.get_data_points()
1828
1829    # FIXME: Remove points outside boundary polygon
1830#    print 'new point',len(points)
1831#
1832#    new_points=[]
1833#    new_points=array([],dtype=float)
1834#    new_points=resize(new_points,(len(points),2))
1835#    print "BOUNDARY", boundary_poly
1836#    for i,point in enumerate(points):
1837#        if is_inside_polygon(point,boundary_poly, verbose=True):
1838#            new_points[i] = point
1839#            print"WOW",i,new_points[i]
1840#    points = new_points
1841
1842    if verbose: print "Number of points in sample to compare: ", len(points)
1843
1844    if alpha_list == None:
1845        alphas = [0.001,0.01,100]
1846        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1847         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1848    else:
1849        alphas = alpha_list
1850
1851    domains = {}
1852
1853    if verbose: print 'Setup computational domains with different alphas'
1854
1855    for alpha in alphas:
1856        # add G_other data to domains with different alphas
1857        if verbose:
1858            print '\n Calculating domain and mesh for Alpha = ', alpha, '\n'
1859        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1860        if verbose: print domain.statistics()
1861        domain.set_quantity(attribute_smoothed,
1862                            geospatial_data=G_other,
1863                            use_cache=cache,
1864                            verbose=verbose,
1865                            alpha=alpha)
1866        domains[alpha] = domain
1867
1868    # creates array with columns 1 and 2 are x, y. column 3 is elevation
1869    # 4 onwards is the elevation_predicted using the alpha, which will
1870    # be compared later against the real removed data
1871    data = num.array([], dtype=num.float)
1872
1873    data = num.resize(data, (len(points), 3+len(alphas)))
1874
1875    # gets relative point from sample
1876    data[:,0] = points[:,0]
1877    data[:,1] = points[:,1]
1878    elevation_sample = G_small.get_attributes(attribute_name=attribute_smoothed)
1879    data[:,2] = elevation_sample
1880
1881    normal_cov = num.array(num.zeros([len(alphas), 2]), dtype=num.float)
1882
1883    if verbose:
1884        print 'Determine difference between predicted results and actual data'
1885    for i, alpha in enumerate(domains):
1886        if verbose: print'Alpha =', alpha
1887
1888        points_geo = domains[alpha].geo_reference.change_points_geo_ref(points)
1889        # returns the predicted elevation of the points that were "split" out
1890        # of the original data set for one particular alpha
1891        elevation_predicted = \
1892                domains[alpha].quantities[attribute_smoothed].\
1893                        get_values(interpolation_points=points_geo)
1894
1895        # add predicted elevation to array that starts with x, y, z...
1896        data[:,i+3] = elevation_predicted
1897
1898        sample_cov = cov(elevation_sample)
1899        ele_cov = cov(elevation_sample - elevation_predicted)
1900        normal_cov[i,:] = [alpha,ele_cov / sample_cov]
1901        print 'memory usage during compare', mem_usage()
1902        if verbose: print 'cov', normal_cov[i][0], '= ', normal_cov[i][1]
1903
1904    normal_cov0 = normal_cov[:,0]
1905    normal_cov_new = num.take(normal_cov, num.argsort(normal_cov0))
1906
1907    if plot_name is not None:
1908        from pylab import savefig,semilogx,loglog
1909
1910        semilogx(normal_cov_new[:,0], normal_cov_new[:,1])
1911        loglog(normal_cov_new[:,0], normal_cov_new[:,1])
1912        savefig(plot_name, dpi=300)
1913    if mesh_file == 'temp.msh':
1914        remove(mesh_file)
1915
1916    return (min(normal_cov_new[:,1]),
1917            normal_cov_new[(num.argmin(normal_cov_new, axis=0))[1],0])
1918
1919
1920if __name__ == "__main__":
1921    pass
1922
Note: See TracBrowser for help on using the repository browser.