source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4575

Last change on this file since 4575 was 4575, checked in by ole, 17 years ago

Diagnostics

File size: 52.7 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5from sys import maxint
6from os import access, F_OK, R_OK
7from types import DictType
8from warnings import warn
9from string import lower
10from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11                        size, shape
12#from Array import tolist
13from RandomArray import randint
14from copy import deepcopy
15
16#from MA import tolist
17
18from Scientific.IO.NetCDF import NetCDFFile   
19from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL   
20from anuga.utilities.numerical_tools import ensure_numeric
21from anuga.coordinate_transforms.geo_reference import Geo_reference, \
22     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
23from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
24from anuga.utilities.anuga_exceptions import ANUGAError
25from anuga.config import points_file_block_line_size as MAX_READ_LINES   
26
27DEFAULT_ATTRIBUTE = 'elevation'
28
29class Geospatial_data:
30
31    def __init__(self,
32                 data_points=None, # this can also be a points file name
33                 attributes=None,
34                 geo_reference=None,
35                 default_attribute_name=None,
36                 file_name=None,
37                 delimiter=None,
38                 latitudes=None,
39                 longitudes=None,
40                 points_are_lats_longs=False,
41                 max_read_lines=None,
42                 load_file_now=True,
43                 verbose=False):
44
45       
46        """
47        Create instance from data points and associated attributes
48
49        data_points: x,y coordinates in meters. Type must be either a
50        sequence of 2-tuples or an Mx2 Numeric array of floats.  A file name
51        with extension .txt, .cvs or .pts can also be passed in here.
52
53        attributes: Associated values for each data point. The type
54        must be either a list or an array of length M or a dictionary
55        of lists (or arrays) of length M. In the latter case the keys
56        in the dictionary represent the attribute names, in the former
57        the attribute will get the default name "elevation".
58       
59        geo_reference: Object representing the origin of the data
60        points. It contains UTM zone, easting and northing and data
61        points are assumed to be relative to this origin.
62        If geo_reference is None, the default geo ref object is used.
63
64        default_attribute_name: Name of default attribute to be used with
65        get_attribute_values. The idea is that the dataset can be
66        equipped with information about which attribute to return.
67        If None, the default is the "first"
68
69        latitudes, longitudes: Vectors of latitudes and longitudes,
70        used to specify location instead of points.
71       
72        points_are_lats_longs: Set this as true if the points are actually
73        lats and longs, not UTM
74
75        max_read_lines: The number of rows read into memory when using
76        blocking to read a file.
77
78        load_file_now:  If true the file is automatically loaded
79        into the geospatial instance. Used when blocking.
80       
81        file_name: Name of input netCDF file or .txt file. netCDF file must
82        have dimensions "points" etc.
83        .txt file is a comma seperated file with x, y and attribute
84        data.
85       
86        The first line has the titles of the columns.  The first two
87        column titles are checked to see if they start with lat or
88        long (not case sensitive).  If so the data is assumed to be
89        latitude and longitude, in decimal format and converted to
90        UTM.  Otherwise the first two columns are assumed to be the x
91        and y, and the title names acually used are ignored.
92 
93       
94        The format for a .txt file is:
95            1st line:     [column names]
96            other lines:  x y [attributes]
97
98            for example:
99            x, y, elevation, friction
100            0.6, 0.7, 4.9, 0.3
101            1.9, 2.8, 5, 0.3
102            2.7, 2.4, 5.2, 0.3
103
104        The first two columns are always  assumed to be x, y
105        coordinates.
106     
107        An issue with the xya format is that the attribute column order
108        is not be controlled.  The info is stored in a dictionary and it's
109        written in an order dependent on the hash order
110       
111        The format for a Points dictionary is:
112
113          ['pointlist'] a 2 column array describing points. 1st column x,
114          2nd column y.
115          ['attributelist'], a dictionary of 1D arrays, representing
116          attribute values at the point.  The dictionary key is the attribute
117          header.
118          ['geo_reference'] a Geo_refernece object. Use if the point
119          information is relative. This is optional.
120            eg
121            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
122            dic['attributelist']['elevation'] = [[7.0,5.0]
123               
124        delimiter: is the file delimiter that will be used when
125            importing a .xya file, which is being phased out.
126           
127        verbose:
128
129         
130        """
131
132        if isinstance(data_points, basestring):
133            # assume data point is really a file name
134            file_name = data_points
135
136        self.set_verbose(verbose)
137        self.geo_reference=None #create the attribute
138        self.file_name = file_name
139        self.max_read_lines = max_read_lines
140
141        if delimiter is not None:
142            msg = 'Specifying delimiters will be removed.'
143            msg = 'Text file format is moving to comma seperated .txt files.'
144            warn(msg, DeprecationWarning) 
145        if file_name is None:
146            if delimiter is not None:
147                msg = 'No file specified yet a delimiter is provided!'
148                raise ValueError, msg
149           
150            if latitudes is not None or longitudes is not None or \
151                   points_are_lats_longs:
152                data_points, geo_reference =  \
153                             _set_using_lat_long(latitudes=latitudes,
154                                  longitudes=longitudes,
155                                  geo_reference=geo_reference,
156                                  data_points=data_points,
157                                  points_are_lats_longs=points_are_lats_longs)
158            self.check_data_points(data_points)
159            self.set_attributes(attributes)
160            self.set_geo_reference(geo_reference)
161            self.set_default_attribute_name(default_attribute_name)
162
163        elif load_file_now is True:
164            # watch for case where file name and points,
165            # attributes etc are provided!!
166            # if file name then all provided info will be removed!
167            self.import_points_file(file_name, delimiter, verbose)
168               
169            self.check_data_points(self.data_points)
170            self.set_attributes(self.attributes) 
171            self.set_geo_reference(self.geo_reference)
172            self.set_default_attribute_name(default_attribute_name)
173
174        #Why?   
175        #assert self.attributes is None or isinstance(self.attributes, DictType)
176        #This is a hassle when blocking, so I've removed it.
177       
178
179    def __len__(self):
180        return len(self.data_points)
181
182    def __repr__(self):
183        return str(self.get_data_points(absolute=True))
184   
185   
186    def check_data_points(self, data_points):
187        """Checks data points
188        """
189       
190        if data_points is None:
191            self.data_points = None
192            msg = 'There is no data or file provided!'
193            raise ValueError, msg
194           
195        else:
196            self.data_points = ensure_numeric(data_points)
197            #print "self.data_points.shape",self.data_points.shape
198            if not (0,) == self.data_points.shape:
199                assert len(self.data_points.shape) == 2
200                assert self.data_points.shape[1] == 2
201
202    def set_attributes(self, attributes):
203        """Check and assign attributes dictionary
204        """
205       
206        if attributes is None:
207            self.attributes = None
208            return
209       
210        if not isinstance(attributes, DictType):
211            #Convert single attribute into dictionary
212            attributes = {DEFAULT_ATTRIBUTE: attributes}
213
214        #Check input attributes   
215        for key in attributes.keys():
216            try:
217                attributes[key] = ensure_numeric(attributes[key])
218            except:
219                msg = 'Attribute %s could not be converted' %key
220                msg += 'to a numeric vector'
221                raise msg
222
223        self.attributes = attributes   
224
225
226    def set_geo_reference(self, geo_reference):
227        """
228        Set's the georeference of geospatial.
229        It can also be used to change the georeference
230        """
231        from anuga.coordinate_transforms.geo_reference import Geo_reference
232
233        if geo_reference is None:
234            geo_reference = Geo_reference() # Use default
235        geo_reference = ensure_geo_reference(geo_reference)
236        if not isinstance(geo_reference, Geo_reference):
237            msg = 'Argument geo_reference must be a valid Geo_reference \n'
238            msg += 'object or None.'
239            raise msg
240
241        # if a geo ref already exists, change the point data to
242        # represent the new geo-ref
243        if  self.geo_reference is not None:
244            #FIXME: Maybe put out a warning here...
245            self.data_points = self.get_data_points \
246                               (geo_reference=geo_reference)
247           
248        self.geo_reference = geo_reference
249
250
251    def set_default_attribute_name(self, default_attribute_name):
252        self.default_attribute_name = default_attribute_name
253
254    def set_verbose(self, verbose=False):
255        if verbose in [False, True]:
256            self.verbose = verbose
257           
258            if verbose is True:
259                print 'Geospatial_data.verbose = True'
260        else:
261            msg = 'Illegal value: %s' %str(verbose)
262            raise Exception, msg
263
264    def clip(self, polygon, closed=True):
265        """Clip geospatial data by a polygon
266
267        Input
268          polygon - Either a list of points, an Nx2 array or
269                    a Geospatial data object.
270          closed - (optional) determine whether points on boundary should be
271          regarded as belonging to the polygon (closed = True)
272          or not (closed = False). Default is True.
273           
274        Output
275          New geospatial data object representing points inside
276          specified polygon.
277       
278        """
279
280        from anuga.utilities.polygon import inside_polygon
281
282        if isinstance(polygon, Geospatial_data):
283            # Polygon is an object - extract points
284            polygon = polygon.get_data_points()
285
286        points = self.get_data_points()   
287        inside_indices = inside_polygon(points, polygon, closed)
288
289        clipped_G = self.get_sample(inside_indices)
290#        clipped_points = take(points, inside_indices)
291
292        # Clip all attributes
293#        attributes = self.get_all_attributes()
294
295#        clipped_attributes = {}
296#        if attributes is not None:
297#            for key, att in attributes.items():
298#                clipped_attributes[key] = take(att, inside_indices)
299
300#        return Geospatial_data(clipped_points, clipped_attributes)
301        return clipped_G
302       
303       
304    def clip_outside(self, polygon, closed=True):
305        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
306
307        Input
308          polygon - Either a list of points, an Nx2 array or
309                    a Geospatial data object.
310          closed - (optional) determine whether points on boundary should be
311          regarded as belonging to the polygon (closed = True)
312          or not (closed = False). Default is True.
313         
314        Output
315          Geospatial data object representing point OUTSIDE specified polygon
316       
317        """
318
319        from anuga.utilities.polygon import outside_polygon
320
321        if isinstance(polygon, Geospatial_data):
322            # Polygon is an object - extract points
323            polygon = polygon.get_data_points()
324
325        points = self.get_data_points()   
326        outside_indices = outside_polygon(points, polygon, closed)
327
328        clipped_G = self.get_sample(outside_indices)
329
330#        clipped_points = take(points, outside_indices)
331
332        # Clip all attributes
333#        attributes = self.get_all_attributes()
334
335#        clipped_attributes = {}
336#        if attributes is not None:
337#            for key, att in attributes.items():
338#                clipped_attributes[key] = take(att, outside_indices)
339
340#        return Geospatial_data(clipped_points, clipped_attributes)
341        return clipped_G
342
343   
344    def get_geo_reference(self):
345        return self.geo_reference
346       
347    def get_data_points(self, absolute=True, geo_reference=None,
348                        as_lat_long=False):
349        """Get coordinates for all data points as an Nx2 array
350
351        If absolute is False returned coordinates are relative to the
352        internal georeference's xll and yll corners, otherwise
353        absolute UTM coordinates are returned.
354
355        If a geo_reference is passed the points are returned relative
356        to that geo_reference.
357
358        Default: absolute is True.
359        """
360        if as_lat_long is True:
361            msg = "Points need a zone to be converted into lats and longs"
362            assert self.geo_reference is not None, msg
363            zone = self.geo_reference.get_zone()
364            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
365            lats_longs = []
366            for point in self.get_data_points(True):
367                ### UTMtoLL(northing, easting, zone,
368                lat_calced, long_calced = UTMtoLL(point[1],point[0], zone)
369                lats_longs.append((lat_calced, long_calced)) # to hash
370            return lats_longs
371           
372        if absolute is True and geo_reference is None:
373            return self.geo_reference.get_absolute(self.data_points)
374        elif geo_reference is not None:
375            return geo_reference.change_points_geo_ref \
376                               (self.data_points, 
377                                self.geo_reference)
378        else:
379            return self.data_points
380       
381   
382    def get_attributes(self, attribute_name=None):
383        """Return values for one named attribute.
384
385        If attribute_name is None, default_attribute_name is used
386        """
387
388        if attribute_name is None:
389            if self.default_attribute_name is not None:
390                attribute_name = self.default_attribute_name
391            else:
392                attribute_name = self.attributes.keys()[0] 
393                # above line takes the first one from keys
394       
395        if self.verbose is True:
396            print 'Using attribute %s' %attribute_name
397            print 'Available attributes: %s' %(self.attributes.keys())       
398
399        msg = 'Attribute name %s does not exist in data set' %attribute_name
400        assert self.attributes.has_key(attribute_name), msg
401
402        return self.attributes[attribute_name]
403
404    def get_all_attributes(self):
405        """
406        Return values for all attributes.
407        The return value is either None or a dictionary (possibly empty).
408        """
409
410        return self.attributes
411
412    def __add__(self, other):
413        """
414        Returns the addition of 2 geospatical objects,
415        objects are concatencated to the end of each other
416           
417        NOTE: doesn't add if objects contain different
418        attributes 
419       
420        Always return absolute points!
421        This alse means, that if you add None to the object,
422        it will be turned into absolute coordinates
423
424        other can be None in which case nothing is added to self.
425        """
426
427
428        # find objects zone and checks if the same
429        geo_ref1 = self.get_geo_reference()
430        zone1 = geo_ref1.get_zone()
431
432
433        if other is not None:
434
435            geo_ref2 = other.get_geo_reference()
436            zone2 = geo_ref2.get_zone()
437
438            geo_ref1.reconcile_zones(geo_ref2)
439
440       
441            new_points = concatenate((self.get_data_points(absolute=True),
442                                      other.get_data_points(absolute=True)),
443                                      axis = 0)       
444
445       
446     
447            # Concatenate attributes if any
448            if self.attributes is None:
449                if other.attributes is not None:
450                    msg = 'Geospatial data must have the same \n'
451                    msg += 'attributes to allow addition.'
452                    raise Exception, msg
453               
454                new_attributes = None
455            else:   
456                new_attributes = {}
457                for x in self.attributes.keys():
458                    if other.attributes.has_key(x):
459
460                        attrib1 = self.attributes[x]
461                        attrib2 = other.attributes[x]
462                        new_attributes[x] = concatenate((attrib1, attrib2))
463
464                    else:
465                        msg = 'Geospatial data must have the same \n'
466                        msg += 'attributes to allow addition.'
467                        raise Exception, msg
468
469
470        else:
471            #other is None:
472           
473            new_points = self.get_data_points(absolute=True)
474            new_attributes = self.attributes
475
476                   
477
478        # Instantiate new data object and return absolute coordinates
479        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
480        return Geospatial_data(new_points,
481                               new_attributes,
482                               new_geo_ref)
483
484
485    def __radd__(self, other):
486        """Handle cases like None + Geospatial_data(...)
487        """
488
489        return self + other
490
491   
492   
493    ###
494    #  IMPORT/EXPORT POINTS FILES
495    ###
496
497    def import_points_file(self, file_name, delimiter=None, verbose=False):
498        """ load an .txt, .csv or .xya or .pts file
499        Note: will throw an IOError if it can't load the file.
500        Catch these!
501
502        Post condition: self.attributes dictionary has been set
503        """
504       
505        if access(file_name, F_OK) == 0 :
506            msg = 'File %s does not exist or is not accessible' %file_name
507            raise IOError, msg
508       
509        attributes = {}
510        if file_name[-4:]== ".xya":
511            # Maybe not phase-out, so we can load in geo-ref info
512            #msg = 'Text file format is moving to comma seperated .txt files.'
513            #warn(msg, DeprecationWarning)
514            try:
515                if delimiter == None:
516                    try:
517                        fd = open(file_name)
518                        data_points, attributes, geo_reference =\
519                                     _read_xya_file(fd, ',')
520                    except TitleError:
521                        fd.close()
522                        fd = open(file_name)
523                        data_points, attributes, geo_reference =\
524                                     _read_xya_file(fd, ' ')
525                else:
526                    fd = open(file_name)
527                    data_points, attributes, geo_reference =\
528                                 _read_xya_file(fd, delimiter)
529                fd.close()
530            except (IndexError,ValueError,SyntaxError):
531                fd.close()   
532                msg = 'Could not open file %s ' %file_name
533                msg += 'Check the file location.'
534                raise IOError, msg
535            except IOError, e:
536                fd.close() 
537                # Catch this to add an error message
538                msg = 'Could not open file or incorrect file format %s:%s'\
539                      %(file_name, e)
540                raise IOError, msg
541               
542        elif file_name[-4:]== ".pts":
543            try:
544                data_points, attributes, geo_reference =\
545                             _read_pts_file(file_name, verbose)
546            except IOError, e:   
547                msg = 'Could not open file %s ' %file_name
548                raise IOError, msg 
549       
550        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
551            try:
552                data_points, attributes, geo_reference =\
553                             _read_csv_file(file_name, verbose)
554            except IOError, e:
555                # This should only be if a file is not found
556                msg = 'Could not open file %s. ' %file_name
557                msg += 'Check the file location.'
558                raise IOError, msg
559            except SyntaxError, e:
560                # This should only be if there is a format error
561                msg = 'Could not open file %s. \n' %file_name
562                msg += Error_message['IOError']
563                #print "msg", msg
564                raise SyntaxError, msg
565        else:     
566            msg = 'Extension %s is unknown' %file_name[-4:]
567            raise IOError, msg
568#        print'in import data_points', data_points
569#        print'in import attributes', attributes
570#        print'in import data_points', geo_reference
571        self.data_points = data_points
572        self.attributes = attributes
573        self.geo_reference = geo_reference
574   
575#        return all_data
576   
577    def export_points_file(self, file_name, absolute=True, as_lat_long=False):
578       
579        """
580        write a points file, file_name, as a text (.xya) or binary (.pts) file
581        file_name is the file name, including the extension
582        The point_dict is defined at the top of this file.
583       
584        If absolute is True data the xll and yll are added to the points value
585        and the xll and yll of the geo_reference are set to 0.
586       
587        If absolute is False data points at returned as relative to the xll
588        and yll and geo_reference remains uneffected
589        """
590
591        if absolute is False and file_name[-4:] == ".xya":
592            msg = 'The text file values must be absolute.   '
593            msg += 'Text file format is moving to comma seperated .txt files.'
594            warn(msg, DeprecationWarning) 
595
596        if (file_name[-4:] == ".xya"):
597            msg = '.xya format is deprecated.  Please use .txt.'
598            warn(msg, DeprecationWarning)
599            if absolute is True:     
600                geo_ref = deepcopy(self.geo_reference)
601                geo_ref.xllcorner = 0
602                geo_ref.yllcorner = 0   
603                _write_xya_file(file_name,
604                                self.get_data_points(absolute=True), 
605                                self.get_all_attributes(),
606                                geo_ref)
607            else:
608                _write_xya_file(file_name,
609                                self.get_data_points(absolute=False), 
610                                self.get_all_attributes(),
611                                self.get_geo_reference())
612                                   
613        elif (file_name[-4:] == ".pts"):
614            if absolute is True:
615                geo_ref = deepcopy(self.geo_reference)
616                geo_ref.xllcorner = 0
617                geo_ref.yllcorner = 0
618                _write_pts_file(file_name,
619                                self.get_data_points(absolute), 
620                                self.get_all_attributes(),
621                                geo_ref)
622            else:
623                _write_pts_file(file_name,
624                                self.get_data_points(absolute), 
625                                self.get_all_attributes(),
626                                self.get_geo_reference())
627               
628        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
629            msg = "ERROR: trying to write a .txt file with relative data."
630            assert absolute, msg
631            _write_csv_file(file_name,
632                            self.get_data_points(absolute=True,
633                                                 as_lat_long=as_lat_long), 
634                            self.get_all_attributes(),
635                            as_lat_long=as_lat_long)
636             
637        elif file_name[-4:] == ".urs" :
638            msg = "ERROR: Can not write a .urs file as a relative file."
639            assert absolute, msg
640            _write_urs_file(file_name,
641                            self.get_data_points(as_lat_long=True))
642                                                         
643        else:
644            msg = 'Unknown file type %s ' %file_name
645            raise IOError, msg
646       
647    def get_sample(self, indices):
648        """ Returns a object which is a subset of the original
649        and the data points and attributes in this new object refer to
650        the indices provided
651       
652        Input
653            indices- a list of integers that represent the new object
654        Output
655            New geospatial data object representing points specified by
656            the indices
657            """
658        #FIXME: add the geo_reference to this
659       
660        points = self.get_data_points()
661        sampled_points = take(points, indices)
662
663        attributes = self.get_all_attributes()
664
665        sampled_attributes = {}
666        if attributes is not None:
667            for key, att in attributes.items():
668                sampled_attributes[key] = take(att, indices)
669
670        return Geospatial_data(sampled_points, sampled_attributes)
671   
672   
673    def split(self, factor=0.5, verbose=False):
674        """Returns two geospatial_data object, first is the size of the 'factor'
675        smaller the original and the second is the remainder. The two new
676        object are disjoin set of each other.
677       
678        Points of the two new object have selected RANDOMLY.
679        AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
680       
681        This method create two lists of indices which are passed into get_sample.
682        The lists are created using random numbers, and they are unique sets eg.
683        total_list(1,2,3,4,5,6,7,8,9)  random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
684               
685        Input - the factor which to split the object, if 0.1 then 10% of the
686            object will be returned
687       
688        Output - two geospatial_data objects that are disjoint sets of the
689            original
690            """
691       
692        i=0
693        self_size = len(self)
694        random_list = []
695        remainder_list = []
696        new_size = round(factor*self_size)
697       
698        #find unique random numbers
699        if verbose: print "make unique random number list and get indices"
700
701        total=array(range(self_size))
702        total_list = total.tolist()
703        if verbose: print "total list len",len(total_list)
704               
705        #there will be repeated random numbers however will not be a
706        #problem as they are being 'pop'ed out of array so if there
707        #are two numbers the same they will pop different indicies,
708        #still basically random
709        ## create list of non-unquie random numbers
710        if verbose: print "create random numbers list %s long" %new_size
711        random_num = randint(0,self_size-1,(int(new_size),))
712        random_num = random_num.tolist()
713
714        #need to sort and reverse so the pop() works correctly
715        random_num.sort()
716        random_num.reverse()       
717       
718        if verbose: print "make random number list and get indices"
719        j=0
720        k=1
721        remainder_list = total_list[:]
722        #pops array index (random_num) from remainder_list (which starts as the
723        #total_list and appends to random_list 
724        random_num_len = len(random_num)
725        for i in random_num:
726            random_list.append(remainder_list.pop(i))
727            j+=1
728            #prints progress
729            if verbose and round(random_num_len/10*k)==j:
730                print '(%s/%s)' %(j, random_num_len)
731                k+=1
732       
733        #FIXME: move to tests, it might take a long time
734        #then create an array of random lenght between 500 and 1000,
735        #and use a random factor between 0 and 1
736        #setup for assertion
737        test_total = random_list[:]
738        test_total.extend(remainder_list)
739        test_total.sort() 
740        msg = 'The two random lists made from the original list when added together '\
741         'DO NOT equal the original list'
742        assert (total_list==test_total),msg
743
744        #get new samples
745        if verbose: print "get values of indices for random list"
746        G1 = self.get_sample(random_list)
747        if verbose: print "get values of indices for opposite of random list"
748        G2 = self.get_sample(remainder_list)
749
750        return G1, G2
751
752    def __iter__(self):
753        """Read in the header, number_of_points and save the
754        file pointer position
755        """
756
757        from Scientific.IO.NetCDF import NetCDFFile
758       
759        #FIXME - what to do if the file isn't there
760
761        if self.file_name[-4:] == ".xya":
762            # FIXME (Ole): shouldn't the xya format be replaced by txt/csv?
763            #  Currently both file formats are used.
764           
765            #let's just read it all
766            pass
767        elif self.file_name[-4:] == ".pts":
768           
769            # see if the file is there.  Throw a QUIET IO error if it isn't
770            fd = open(self.file_name,'r')
771            fd.close()
772   
773            #throws prints to screen if file not present
774            self.fid = NetCDFFile(self.file_name, 'r')
775           
776            self.blocking_georef, self.blocking_keys, self.last_row = \
777                     _read_pts_file_header(self.fid, self.verbose)
778            self.start_row = 0
779            self.show_verbose = 0
780            self.verbose_block_size = (self.last_row + 10)/10
781           
782            if self.verbose is True:
783                print 'Reading %d points (blocking) from file %s'\
784                      %(self.last_row,
785                        self.file_name)
786           
787        else:
788            # assume the file is a csv file
789            file_pointer = open(self.file_name)
790            self.header, self.file_pointer = \
791                         _read_csv_file_header(file_pointer)
792            self.blocking_georef = None # Used for reconciling zones
793           
794        if self.max_read_lines is None:
795            self.max_read_lines = MAX_READ_LINES
796        return self
797   
798    def next(self):
799        """ read a block, instanciate a new geospatial and return it"""
800       
801        if self.file_name[-4:]== ".xya" :
802            # FIXME (Ole): shouldn't the xya format be replaced by txt/csv?
803            #  Currently both file formats are used.
804           
805            if not hasattr(self,'finished_reading') or \
806                   self.finished_reading is False:
807                #let's just read it all
808                geo = Geospatial_data(self.file_name)
809                self.finished_reading = True
810            else:
811                raise StopIteration
812                self.finished_reading = False
813               
814        elif self.file_name[-4:]== ".pts":
815            if self.start_row == self.last_row:
816                # read the end of the file last iteration
817                # remove blocking attributes
818                self.fid.close()
819                del self.max_read_lines
820                del self.blocking_georef
821                del self.last_row
822                del self.start_row
823                del self.blocking_keys
824                del self.fid
825                raise StopIteration
826            fin_row = self.start_row + self.max_read_lines
827            if fin_row > self.last_row:
828                fin_row = self.last_row
829
830               
831           
832            if self.verbose is True:
833                if self.show_verbose >= self.start_row and \
834                       self.show_verbose < fin_row:
835                 print 'Doing %d of %d' %(self.start_row, self.last_row+10)
836                 self.show_verbose += max(self.max_read_lines,
837                                          self.verbose_block_size)
838            #call stuff
839            pointlist, att_dict, = \
840                   _read_pts_file_blocking(self.fid,
841                                           self.start_row,
842                                           fin_row,
843                                           self.blocking_keys)
844           
845            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
846            self.start_row = fin_row
847           
848        else:
849            # assume the file is a csv file
850            try:
851                pointlist, att_dict, geo_ref, self.file_pointer = \
852                   _read_csv_file_blocking( self.file_pointer,
853                                            self.header[:],
854                                            max_read_lines=self.max_read_lines,
855                                            verbose=self.verbose)
856
857                # Check that the zones haven't changed.
858                if geo_ref is not None:
859                    geo_ref.reconcile_zones(self.blocking_georef)
860                    self.blocking_georef = geo_ref
861                elif self.blocking_georef is not None:
862                   
863                    msg = 'Geo reference given, then not given.'
864                    msg += ' This should not happen.' 
865                    raise ValueError, msg
866                geo = Geospatial_data(pointlist, att_dict, geo_ref)
867            except StopIteration:
868                self.file_pointer.close()
869                del self.header
870                del self.file_pointer
871                raise StopIteration
872            except ANUGAError:
873                self.file_pointer.close()
874                del self.header
875                del self.file_pointer
876                raise
877            except SyntaxError:
878                self.file_pointer.close()
879                del self.header
880                del self.file_pointer
881                # This should only be if there is a format error
882                msg = 'Could not open file %s. \n' %self.file_name
883                msg += Error_message['IOError']
884                raise SyntaxError, msg
885        return geo
886##################### Error messages ###########
887Error_message = {}
888Em = Error_message
889Em['IOError'] = "NOTE: The format for a comma seperated .txt/.csv file is:\n"
890Em['IOError'] += "        1st line:     [column names]\n"
891Em['IOError'] += "        other lines:  [x value], [y value], [attributes]\n"
892Em['IOError'] += "\n"
893Em['IOError'] += "           for example:\n"
894Em['IOError'] += "           x, y, elevation, friction\n"
895Em['IOError'] += "           0.6, 0.7, 4.9, 0.3\n"
896Em['IOError'] += "           1.9, 2.8, 5, 0.3\n"
897Em['IOError'] += "           2.7, 2.4, 5.2, 0.3\n"
898Em['IOError'] += "\n"
899Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
900Em['IOError'] += "The attribute values must be numeric.\n"
901
902def _set_using_lat_long(latitudes,
903                        longitudes,
904                        geo_reference,
905                        data_points,
906                        points_are_lats_longs):
907    """
908    if the points has lat long info, assume it is in (lat, long) order.
909    """
910   
911    if geo_reference is not None:
912        msg = """A georeference is specified yet latitude and longitude
913        are also specified!"""
914        raise ValueError, msg
915   
916    if data_points is not None and not points_are_lats_longs:
917        msg = """Data points are specified yet latitude and
918        longitude are also specified!"""
919        raise ValueError, msg
920   
921    if points_are_lats_longs:
922        if data_points is None:
923            msg = """Data points are not specified !"""
924            raise ValueError, msg
925        lats_longs = ensure_numeric(data_points)
926        latitudes = ravel(lats_longs[:,0:1])
927        longitudes = ravel(lats_longs[:,1:])
928       
929    if latitudes is None and longitudes is None:
930        msg = """Latitudes and Longitudes are not."""
931        raise ValueError, msg
932   
933    if latitudes is None:
934        msg = """Longitudes are specified yet latitudes aren't!"""
935        raise ValueError, msg
936   
937    if longitudes is None:
938        msg = """Latitudes are specified yet longitudes aren't!"""
939        raise ValueError, msg
940   
941    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
942                                                    longitudes=longitudes)
943    return data_points, Geo_reference(zone=zone)
944
945   
946def _read_pts_file(file_name, verbose=False):
947    """Read .pts NetCDF file
948   
949    Return a dic of array of points, and dic of array of attribute
950    eg
951    dic['points'] = [[1.0,2.0],[3.0,5.0]]
952    dic['attributelist']['elevation'] = [[7.0,5.0]
953    """   
954
955    from Scientific.IO.NetCDF import NetCDFFile
956   
957    if verbose: print 'Reading ', file_name
958   
959       
960    # see if the file is there.  Throw a QUIET IO error if it isn't
961    fd = open(file_name,'r')
962    fd.close()
963   
964    #throws prints to screen if file not present
965    fid = NetCDFFile(file_name, 'r') 
966   
967    pointlist = array(fid.variables['points'])
968    keys = fid.variables.keys()
969    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
970    try:
971        keys.remove('points')
972    except IOError, e:       
973        fid.close()   
974        msg = 'Expected keyword "points" but could not find it'
975        raise IOError, msg
976   
977    attributes = {}
978    for key in keys:
979        if verbose: print "reading attribute '%s'" %key
980           
981        attributes[key] = array(fid.variables[key])
982   
983   
984    try:
985        geo_reference = Geo_reference(NetCDFObject=fid)
986    except AttributeError, e:
987        geo_reference = None
988   
989    fid.close()
990   
991    return pointlist, attributes, geo_reference
992
993
994def _read_csv_file(file_name, verbose=False):
995    """Read .csv file
996   
997    Return a dic of array of points, and dic of array of attribute
998    eg
999    dic['points'] = [[1.0,2.0],[3.0,5.0]]
1000    dic['attributelist']['elevation'] = [[7.0,5.0]
1001    """
1002   
1003    file_pointer = open(file_name)
1004    header, file_pointer = _read_csv_file_header(file_pointer)
1005    try:
1006        pointlist, att_dict, geo_ref, file_pointer = \
1007                   _read_csv_file_blocking( \
1008                file_pointer,
1009                header,
1010                max_read_lines=1e30) #If the file is bigger that this, block..
1011    except ANUGAError:
1012        file_pointer.close()
1013        raise 
1014    file_pointer.close()
1015    return pointlist, att_dict, geo_ref   
1016
1017CSV_DELIMITER = ','
1018def _read_csv_file_header(file_pointer,
1019                          delimiter=CSV_DELIMITER,
1020                          verbose=False):
1021
1022    """Read the header of a .csv file
1023    Return a list of the header names
1024    """
1025    line = file_pointer.readline()
1026    header = clean_line(line, delimiter)
1027    return header, file_pointer
1028
1029def _read_csv_file_blocking(file_pointer, header,
1030                            delimiter=CSV_DELIMITER,
1031                            max_read_lines=MAX_READ_LINES,
1032                            verbose=False):
1033   
1034
1035    """
1036    Read the body of a .csv file.
1037    header: The list header of the csv file, with the x and y labels.
1038    """
1039    points = []
1040    pointattributes = []
1041    att_dict = {}
1042
1043    #This is to remove the x and y headers.
1044    header = header[:]
1045    try:
1046        x_header = header.pop(0)
1047        y_header = header.pop(0)
1048    except IndexError:
1049        # if there are not two columns this will occur.
1050        # eg if it is a space seperated file
1051        raise SyntaxError
1052   
1053    read_lines = 0
1054    while read_lines<max_read_lines:
1055        line = file_pointer.readline()
1056        #print "line",line
1057        numbers = clean_line(line,delimiter)
1058        if len(numbers) <= 1:
1059            break
1060        if line[0] == '#':
1061            continue
1062        read_lines += 1
1063        try:
1064            x = float(numbers[0])
1065            y = float(numbers[1])
1066            points.append([x,y])
1067            numbers.pop(0)
1068            numbers.pop(0)
1069            if len(header) != len(numbers):
1070                file_pointer.close() 
1071                msg = "File load error.  There might be a problem with the file header"
1072                raise SyntaxError, msg
1073            for i,num in enumerate(numbers):
1074                num.strip()
1075                if num != '\n' and num != '':
1076                    #attributes.append(float(num))
1077                    att_dict.setdefault(header[i],[]).append(float(num))
1078        #except IOError:           
1079        except ValueError:
1080            raise SyntaxError
1081    if points == []:
1082        raise StopIteration
1083       
1084   
1085    pointlist = array(points).astype(Float)
1086    for key in att_dict.keys():
1087        att_dict[key] = array(att_dict[key]).astype(Float)
1088
1089    #Do stuff here so the info is in lat's and longs
1090    geo_ref = None
1091    x_header = lower(x_header[:3])
1092    y_header = lower(y_header[:3])
1093    if (x_header == 'lon' or  x_header == 'lat') and \
1094       (y_header == 'lon' or  y_header == 'lat'):
1095        if x_header == 'lon':
1096            longitudes = ravel(pointlist[:,0:1])
1097            latitudes = ravel(pointlist[:,1:])
1098        else:
1099            latitudes = ravel(pointlist[:,0:1])
1100            longitudes = ravel(pointlist[:,1:])
1101       
1102        pointlist, geo_ref = _set_using_lat_long(latitudes,
1103                                                 longitudes,
1104                                                 geo_reference=None,
1105                                                 data_points=None,
1106                                                 points_are_lats_longs=False)
1107    return pointlist, att_dict, geo_ref, file_pointer
1108
1109def _read_pts_file_header(fid, verbose=False):
1110
1111    """Read the geo_reference and number_of_points from a .pts file
1112    """
1113   
1114    keys = fid.variables.keys()
1115    try:
1116        keys.remove('points')
1117    except IOError, e:       
1118        fid.close()   
1119        msg = 'Expected keyword "points" but could not find it'
1120        raise IOError, msg
1121    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1122   
1123    try:
1124        geo_reference = Geo_reference(NetCDFObject=fid)
1125    except AttributeError, e:
1126        geo_reference = None
1127
1128    return geo_reference, keys, fid.dimensions['number_of_points']
1129
1130def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1131                            #verbose=False):
1132   
1133
1134    """
1135    Read the body of a .csv file.
1136    header: The list header of the csv file, with the x and y labels.
1137    """
1138   
1139    pointlist = array(fid.variables['points'][start_row:fin_row])
1140   
1141    attributes = {}
1142    for key in keys:
1143        attributes[key] = array(fid.variables[key][start_row:fin_row])
1144
1145    return pointlist, attributes
1146   
1147   
1148def _read_xya_file(fd, delimiter):
1149    points = []
1150    pointattributes = []
1151    title = fd.readline()
1152    att_names = clean_line(title,delimiter)
1153    att_dict = {}
1154    line = fd.readline()
1155    numbers = clean_line(line,delimiter)
1156   
1157    while len(numbers) > 1 and line[0] <> '#':
1158        if numbers != []:
1159            try:
1160                x = float(numbers[0])
1161                y = float(numbers[1])
1162                points.append([x,y])
1163                numbers.pop(0)
1164                numbers.pop(0)
1165                if len(att_names) != len(numbers):
1166                    fd.close()
1167                    # It might not be a problem with the title
1168                    #raise TitleAmountError
1169                    raise IOError
1170                for i,num in enumerate(numbers):
1171                    num.strip()
1172                    if num != '\n' and num != '':
1173                        #attributes.append(float(num))
1174                        att_dict.setdefault(att_names[i],[]).append(float(num))
1175            except ValueError:
1176                raise SyntaxError
1177        line = fd.readline()
1178        numbers = clean_line(line,delimiter)
1179   
1180    if line == '':
1181        geo_reference = None
1182    else:
1183        geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
1184       
1185   
1186    pointlist = array(points).astype(Float)
1187    for key in att_dict.keys():
1188        att_dict[key] = array(att_dict[key]).astype(Float)
1189   
1190    return pointlist, att_dict, geo_reference
1191
1192def _write_pts_file(file_name,
1193                    write_data_points,
1194                    write_attributes=None, 
1195                    write_geo_reference=None):
1196    """
1197    Write .pts NetCDF file   
1198
1199    NOTE: Below might not be valid ask Duncan : NB 5/2006
1200   
1201    WARNING: This function mangles the point_atts data structure
1202    #F??ME: (DSG)This format has issues.
1203    # There can't be an attribute called points
1204    # consider format change
1205    # method changed by NB not sure if above statement is correct
1206   
1207    should create new test for this
1208    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1209    for key in point_atts.keys():
1210        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1211        assert key in legal_keys, msg
1212    """   
1213    from Scientific.IO.NetCDF import NetCDFFile
1214    # NetCDF file definition
1215    outfile = NetCDFFile(file_name, 'w')
1216   
1217    #Create new file
1218    outfile.institution = 'Geoscience Australia'
1219    outfile.description = 'NetCDF format for compact and portable storage ' +\
1220                          'of spatial point data'
1221   
1222    # dimension definitions
1223    shape = write_data_points.shape[0]
1224    outfile.createDimension('number_of_points', shape) 
1225    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1226   
1227    # variable definition
1228    outfile.createVariable('points', Float, ('number_of_points',
1229                                             'number_of_dimensions'))
1230
1231    #create variables 
1232    outfile.variables['points'][:] = write_data_points #.astype(Float32)
1233
1234    if write_attributes is not None:
1235        for key in write_attributes.keys():
1236            outfile.createVariable(key, Float, ('number_of_points',))
1237            outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1238       
1239    if write_geo_reference is not None:
1240        write_NetCDF_georeference(write_geo_reference, outfile)
1241       
1242    outfile.close() 
1243 
1244
1245
1246def _write_xya_file(file_name,
1247                    write_data_points,
1248                    write_attributes=None, 
1249                    write_geo_reference=None, 
1250                    delimiter=','):
1251    """
1252    export a file, file_name, with the xya format
1253   
1254    """
1255    points = write_data_points
1256    pointattributes = write_attributes
1257   
1258    fd = open(file_name,'w')
1259    titlelist = ""
1260    if pointattributes is not None:   
1261        for title in pointattributes.keys():
1262            titlelist = titlelist + title + delimiter
1263        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1264    fd.write(titlelist+"\n")
1265   
1266    #<vertex #> <x> <y> [attributes]
1267    for i, vert in enumerate( points):
1268
1269
1270        if pointattributes is not None:           
1271            attlist = ","
1272            for att in pointattributes.keys():
1273                attlist = attlist + str(pointattributes[att][i])+ delimiter
1274            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1275            attlist.strip()
1276        else:
1277            attlist = ''
1278
1279        fd.write(str(vert[0]) + delimiter +
1280                 str(vert[1]) + attlist + "\n")
1281
1282    if  write_geo_reference is not None:
1283        write_geo_reference = ensure_geo_reference(write_geo_reference)
1284        write_geo_reference.write_ASCII(fd)
1285    fd.close()
1286
1287
1288def _write_csv_file(file_name,
1289                    write_data_points,
1290                    write_attributes=None,
1291                    as_lat_long=False,
1292                    delimiter=','):
1293    """
1294    export a file, file_name, with the csv format
1295   
1296    """
1297    points = write_data_points
1298    pointattributes = write_attributes
1299   
1300    fd = open(file_name,'w')
1301    if as_lat_long:
1302        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1303    else:
1304        titlelist = "x" + delimiter + "y"  + delimiter
1305    if pointattributes is not None:   
1306        for title in pointattributes.keys():
1307            titlelist = titlelist + title + delimiter
1308        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1309    fd.write(titlelist+"\n")
1310   
1311    # <x/lat> <y/long> [attributes]
1312    for i, vert in enumerate( points):
1313
1314
1315        if pointattributes is not None:           
1316            attlist = ","
1317            for att in pointattributes.keys():
1318                attlist = attlist + str(pointattributes[att][i])+ delimiter
1319            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1320            attlist.strip()
1321        else:
1322            attlist = ''
1323
1324        fd.write(str(vert[0]) + delimiter +
1325                 str(vert[1]) + attlist + "\n")
1326
1327    fd.close()
1328 
1329def _write_urs_file(file_name,
1330                    points,
1331                    delimiter=' '):
1332    """
1333    export a file, file_name, with the urs format
1334    the data points are in lats and longs
1335   
1336    """   
1337    fd = open(file_name,'w')   
1338    fd.write(str(len(points))+"\n")   
1339    # <lat> <long> <id#>
1340    for i, vert in enumerate( points):
1341        fd.write(str(round(vert[0],7)) + delimiter + \
1342                 str(round(vert[1],7)) + delimiter +str(i)+ "\n")
1343    fd.close()
1344   
1345def _point_atts2array(point_atts):
1346    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1347   
1348    for key in point_atts['attributelist'].keys():
1349        point_atts['attributelist'][key]=\
1350                array(point_atts['attributelist'][key]).astype(Float)
1351    return point_atts
1352
1353   
1354
1355
1356def geospatial_data2points_dictionary(geospatial_data):
1357    """Convert geospatial data to points_dictionary
1358    """
1359
1360    points_dictionary = {}
1361    points_dictionary['pointlist'] = geospatial_data.data_points
1362
1363    points_dictionary['attributelist'] = {}
1364
1365    for attribute_name in geospatial_data.attributes.keys():
1366        val = geospatial_data.attributes[attribute_name]
1367        points_dictionary['attributelist'][attribute_name] = val
1368
1369    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1370
1371    return points_dictionary
1372
1373   
1374def points_dictionary2geospatial_data(points_dictionary):
1375    """Convert points_dictionary to geospatial data object
1376    """
1377
1378    msg = 'Points dictionary must have key pointlist' 
1379    assert points_dictionary.has_key('pointlist'), msg
1380
1381    msg = 'Points dictionary must have key attributelist'     
1382    assert points_dictionary.has_key('attributelist'), msg       
1383
1384    if points_dictionary.has_key('geo_reference'):
1385        geo = points_dictionary['geo_reference']
1386    else:
1387        geo = None
1388   
1389    return Geospatial_data(points_dictionary['pointlist'],
1390                           points_dictionary['attributelist'],
1391                           geo_reference = geo)
1392
1393def clean_line(line,delimiter):     
1394    """Remove whitespace
1395    """
1396    #print ">%s" %line
1397    line = line.strip()
1398    #print "stripped>%s" %line
1399    numbers = line.split(delimiter)
1400    i = len(numbers) - 1
1401    while i >= 0:
1402        if numbers[i] == '':
1403            numbers.pop(i)
1404        else:
1405            numbers[i] = numbers[i].strip()
1406       
1407        i += -1
1408    #for num in numbers:
1409    #    print "num>%s<" %num
1410    return numbers
1411           
1412def ensure_absolute(points, geo_reference=None):
1413    """
1414    This function inputs several formats and
1415    outputs one format. - a numeric array of absolute points.
1416
1417    Inputed formats are;
1418    points: List or numeric array of coordinate pairs [xi, eta] of
1419              points or geospatial object or points file name
1420
1421    mesh_origin: A geo_reference object or 3-tuples consisting of
1422                 UTM zone, easting and northing.
1423                 If specified vertex coordinates are assumed to be
1424                 relative to their respective origins.
1425    """
1426    if isinstance(points,type('')):
1427        #It's a string
1428        #assume it is a point file
1429        points = Geospatial_data(file_name = points)
1430       
1431    if isinstance(points,Geospatial_data):
1432        points = points.get_data_points( \
1433                absolute = True)
1434        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1435        assert geo_reference == None, msg
1436           
1437    else:
1438        points = ensure_numeric(points, Float)
1439    if geo_reference is None:
1440        geo = None #Geo_reference()
1441    else:
1442        if isinstance(geo_reference, Geo_reference):
1443            geo = geo_reference
1444        else:
1445            geo = Geo_reference(geo_reference[0],
1446                                geo_reference[1],
1447                                geo_reference[2])
1448        points = geo.get_absolute(points)
1449    return points
1450     
1451
1452def ensure_geospatial(points, geo_reference=None):
1453    """
1454    This function inputs several formats and
1455    outputs one format. - a geospatial_data instance.
1456
1457    Inputed formats are;
1458    points: List or numeric array of coordinate pairs [xi, eta] of
1459              points or geospatial object
1460
1461    mesh_origin: A geo_reference object or 3-tuples consisting of
1462                 UTM zone, easting and northing.
1463                 If specified vertex coordinates are assumed to be
1464                 relative to their respective origins.
1465    """
1466    if isinstance(points,Geospatial_data):
1467        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1468        assert geo_reference == None, msg
1469        return points   
1470    else:
1471        points = ensure_numeric(points, Float)
1472    if geo_reference is None:
1473        geo = None
1474    else:
1475        if isinstance(geo_reference, Geo_reference):
1476            geo = geo_reference
1477        else:
1478            geo = Geo_reference(geo_reference[0],
1479                                geo_reference[1],
1480                                geo_reference[2])
1481    points = Geospatial_data(data_points=points, geo_reference=geo)       
1482    return points
1483
1484#def file2xya(filename):
1485   
1486#    G = Geospatial_data(filename)
1487#    G.export_points_file()
1488
1489             
1490
1491         
1492if __name__ == "__main__":
1493    pass
1494   
Note: See TracBrowser for help on using the repository browser.