source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4174

Last change on this file since 4174 was 4174, checked in by duncan, 18 years ago

working on fit_to_mesh when .pts and .xya files are passed in.

File size: 42.0 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5
6from os import access, F_OK, R_OK
7from types import DictType
8from warnings import warn
9
10from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11                        size, shape
12from random import randint
13#from MA import tolist
14
15from anuga.utilities.numerical_tools import ensure_numeric
16from anuga.coordinate_transforms.geo_reference import Geo_reference, TitleError
17from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
18
19MAX_READ_LINES = 500       
20class Geospatial_data:
21
22    def __init__(self,
23                 data_points=None, # this can also be a points file name
24                 attributes=None,
25                 geo_reference=None,
26                 default_attribute_name=None,
27                 file_name=None,
28                 delimiter=None,
29                 latitudes=None,
30                 longitudes=None,
31                 points_are_lats_longs=False,
32                 max_read_lines=None,
33                 load_file_now=True,
34                 verbose=False):
35
36       
37        """
38        Create instance from data points and associated attributes
39
40        data_points: x,y coordinates in meters. Type must be either a
41        sequence of 2-tuples or an Mx2 Numeric array of floats.
42
43        attributes: Associated values for each data point. The type
44        must be either a list or an array of length M or a dictionary
45        of lists (or arrays) of length M. In the latter case the keys
46        in the dictionary represent the attribute names, in the former
47        the attribute will get the default name "attribute".
48       
49        geo_reference: Object representing the origin of the data
50        points. It contains UTM zone, easting and northing and data
51        points are assumed to be relative to this origin.
52        If geo_reference is None, the default geo ref object is used
53
54        default_attribute_name: Name of default attribute to be used with
55        get_attribute_values. The idea is that the dataset can be
56        equipped with information about which attribute to return.
57        If None, the default is the "first"
58       
59        file_name: Name of input netCDF file or xya file. netCDF file must
60        have dimensions "points" etc.
61        xya file is a comma seperated file with x, y and attribute data.
62        the first line must be the attribute names eg elevation
63       
64        The format for a .xya file is:
65            1st line:     [attribute names]
66            other lines:  x y [attributes]
67
68            for example:
69            elevation, friction
70            0.6, 0.7, 4.9, 0.3
71            1.9, 2.8, 5, 0.3
72            2.7, 2.4, 5.2, 0.3
73
74        The first two columns are always implicitly assumed to be x, y
75        coordinates.
76        Use the same delimiter for the attribute names and the data
77
78        An xya file can optionally end with
79            #geo reference
80            56
81            466600.0
82            8644444.0
83
84        When the 1st # is the zone,
85        2nd # the xllcorner and
86        3rd # the yllcorner
87
88        An issue with the xya format is that the attribute column order
89        is not be controlled.  The info is stored in a dictionary and it's
90        written however
91       
92        The format for a Points dictionary is:
93
94          ['pointlist'] a 2 column array describing points. 1st column x,
95          2nd column y.
96          ['attributelist'], a dictionary of 1D arrays, representing
97          attribute values at the point.  The dictionary key is the attribute
98          header.
99          ['geo_reference'] a Geo_refernece object. Use if the point
100          information is relative. This is optional.
101            eg
102            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
103            dic['attributelist']['elevation'] = [[7.0,5.0]
104               
105        delimiter: is the file delimiter that will be used when
106            importing the file
107           
108        verbose:
109
110        load_file_now: if load file now is true, the file is
111        loaded during instanciation.
112         
113        """
114
115        if isinstance(data_points, basestring):
116            # assume data point is really a file name
117            file_name = data_points
118
119        self.set_verbose(verbose)
120        self.geo_reference=None #create the attribute
121        self.file_name = file_name
122        self.max_read_lines = max_read_lines
123        if file_name is None:
124            if delimiter is not None:
125                msg = 'No file specified yet a delimiter is provided!'
126                raise ValueError, msg
127            file_name = None
128            if latitudes is not None or longitudes is not None or \
129                   points_are_lats_longs:
130                data_points, geo_reference =  \
131                             self._set_using_lat_long(latitudes=latitudes,
132                                  longitudes=longitudes,
133                                  geo_reference=geo_reference,
134                                  data_points=data_points,
135                                  points_are_lats_longs=points_are_lats_longs)
136            self.check_data_points(data_points)
137            self.set_attributes(attributes)
138            self.set_geo_reference(geo_reference)
139            self.set_default_attribute_name(default_attribute_name)
140
141        elif load_file_now is True:
142            # watch for case where file name and points,
143            # attributes etc are provided!!
144            # if file name then all provided info will be removed!
145            self.import_points_file(file_name, delimiter, verbose)
146               
147            self.check_data_points(self.data_points)
148            self.set_attributes(self.attributes) 
149            self.set_geo_reference(self.geo_reference)
150            self.set_default_attribute_name(default_attribute_name)
151
152        #Why?   
153        #assert self.attributes is None or isinstance(self.attributes, DictType)
154        #This is a hassle when blocking, so I've removed it.
155       
156
157    def __len__(self):
158        return len(self.data_points)
159
160    def __repr__(self):
161        return str(self.get_data_points(absolute=True))
162   
163   
164    def check_data_points(self, data_points):
165        """Checks data points
166        """
167       
168        if data_points is None:
169            self.data_points = None
170            msg = 'There is no data or file provided!'
171            raise ValueError, msg
172           
173        else:
174            self.data_points = ensure_numeric(data_points)
175            #print "self.data_points.shape",self.data_points.shape
176            if not (0,) == self.data_points.shape:
177                assert len(self.data_points.shape) == 2
178                assert self.data_points.shape[1] == 2
179
180    def set_attributes(self, attributes):
181        """Check and assign attributes dictionary
182        """
183       
184        if attributes is None:
185            self.attributes = None
186            return
187       
188        if not isinstance(attributes, DictType):
189            #Convert single attribute into dictionary
190            attributes = {'attribute': attributes}
191
192        #Check input attributes   
193        for key in attributes.keys():
194            try:
195                attributes[key] = ensure_numeric(attributes[key])
196            except:
197                msg = 'Attribute %s could not be converted' %key
198                msg += 'to a numeric vector'
199                raise msg
200
201        self.attributes = attributes   
202
203
204    def set_geo_reference(self, geo_reference):
205
206        from anuga.coordinate_transforms.geo_reference import Geo_reference
207
208        if geo_reference is None:
209            geo_reference = Geo_reference() # Use default
210        if not isinstance(geo_reference, Geo_reference):
211            msg = 'Argument geo_reference must be a valid Geo_reference \n'
212            msg += 'object or None.'
213            raise msg
214
215        # if a geo ref already exists, change the point data to
216        # represent the new geo-ref
217        if  self.geo_reference is not None:
218            #FIXME: Maybe put out a warning here...
219            self.data_points = self.get_data_points \
220                               (geo_reference=geo_reference)
221           
222        self.geo_reference = geo_reference
223
224
225    def set_default_attribute_name(self, default_attribute_name):
226        self.default_attribute_name = default_attribute_name
227
228    def set_verbose(self, verbose=False):
229        if verbose in [False, True]:
230            self.verbose = verbose
231        else:
232            msg = 'Illegal value: %s' %str(verbose)
233            raise Exception, msg
234
235    def clip(self, polygon, closed=True):
236        """Clip geospatial data by a polygon
237
238        Input
239          polygon - Either a list of points, an Nx2 array or
240                    a Geospatial data object.
241          closed - (optional) determine whether points on boundary should be
242          regarded as belonging to the polygon (closed = True)
243          or not (closed = False). Default is True.
244           
245        Output
246          New geospatial data object representing points inside
247          specified polygon.
248       
249        """
250
251        from anuga.utilities.polygon import inside_polygon
252
253        if isinstance(polygon, Geospatial_data):
254            # Polygon is an object - extract points
255            polygon = polygon.get_data_points()
256
257        points = self.get_data_points()   
258        inside_indices = inside_polygon(points, polygon, closed)
259
260        clipped_G = self.get_sample(inside_indices)
261#        clipped_points = take(points, inside_indices)
262
263        # Clip all attributes
264#        attributes = self.get_all_attributes()
265
266#        clipped_attributes = {}
267#        if attributes is not None:
268#            for key, att in attributes.items():
269#                clipped_attributes[key] = take(att, inside_indices)
270
271#        return Geospatial_data(clipped_points, clipped_attributes)
272        return clipped_G
273       
274       
275    def clip_outside(self, polygon, closed=True):
276        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
277
278        Input
279          polygon - Either a list of points, an Nx2 array or
280                    a Geospatial data object.
281          closed - (optional) determine whether points on boundary should be
282          regarded as belonging to the polygon (closed = True)
283          or not (closed = False). Default is True.
284         
285        Output
286          Geospatial data object representing point OUTSIDE specified polygon
287       
288        """
289
290        from anuga.utilities.polygon import outside_polygon
291
292        if isinstance(polygon, Geospatial_data):
293            # Polygon is an object - extract points
294            polygon = polygon.get_data_points()
295
296        points = self.get_data_points()   
297        outside_indices = outside_polygon(points, polygon, closed)
298
299        clipped_G = self.get_sample(outside_indices)
300
301#        clipped_points = take(points, outside_indices)
302
303        # Clip all attributes
304#        attributes = self.get_all_attributes()
305
306#        clipped_attributes = {}
307#        if attributes is not None:
308#            for key, att in attributes.items():
309#                clipped_attributes[key] = take(att, outside_indices)
310
311#        return Geospatial_data(clipped_points, clipped_attributes)
312        return clipped_G
313
314   
315    def _set_using_lat_long(self,
316                            latitudes,
317                            longitudes,
318                            geo_reference,
319                            data_points,
320                            points_are_lats_longs):
321       
322        if geo_reference is not None:
323            msg = """A georeference is specified yet latitude and longitude
324            are also specified!"""
325            raise ValueError, msg
326       
327        if data_points is not None and not points_are_lats_longs:
328            msg = """Data points are specified yet latitude and
329            longitude are also specified!"""
330            raise ValueError, msg
331       
332        if points_are_lats_longs:
333            if data_points is None:
334                msg = """Data points are not specified !"""
335                raise ValueError, msg
336            lats_longs = ensure_numeric(data_points)
337            latitudes = ravel(lats_longs[:,0:1])
338            longitudes = ravel(lats_longs[:,1:])
339           
340        if latitudes is None and longitudes is None:
341            msg = """Latitudes and Longitudes are not."""
342            raise ValueError, msg
343       
344        if latitudes is None:
345            msg = """Longitudes are specified yet latitudes aren't!"""
346            raise ValueError, msg
347       
348        if longitudes is None:
349            msg = """Latitudes are specified yet longitudes aren't!"""
350            raise ValueError, msg
351       
352        data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
353                                                        longitudes=longitudes)
354       
355        return data_points, Geo_reference(zone=zone)
356   
357    def get_geo_reference(self):
358        return self.geo_reference
359       
360    def get_data_points(self, absolute=True, geo_reference=None):
361        """Get coordinates for all data points as an Nx2 array
362
363        If absolute is True absolute UTM coordinates are returned otherwise
364        returned coordinates are relative to the internal georeference's
365        xll and yll corners.
366
367        If a geo_reference is passed the points are returned relative
368        to that geo_reference.
369
370        Default: absolute is True.
371        """
372
373        if absolute is True and geo_reference is None:
374            return self.geo_reference.get_absolute(self.data_points)
375        elif geo_reference is not None:
376            return geo_reference.change_points_geo_ref \
377                               (self.data_points, 
378                                self.geo_reference)
379        else:
380            return self.data_points
381       
382   
383    def get_attributes(self, attribute_name=None):
384        """Return values for one named attribute.
385
386        If attribute_name is None, default_attribute_name is used
387        """
388
389        if attribute_name is None:
390            if self.default_attribute_name is not None:
391                attribute_name = self.default_attribute_name
392            else:
393                attribute_name = self.attributes.keys()[0] 
394                # above line takes the first one from keys
395       
396        if self.verbose is True:
397            print 'Using attribute %s' %attribute_name
398            print 'Available attributes: %s' %(self.attributes.keys())       
399
400        msg = 'Attribute name %s does not exist in data set' %attribute_name
401        assert self.attributes.has_key(attribute_name), msg
402
403        return self.attributes[attribute_name]
404
405    def get_all_attributes(self):
406        """
407        Return values for all attributes.
408        The return value is either None or a dictionary (possibly empty).
409        """
410
411        return self.attributes
412
413    def __add__(self, other):
414        """
415        Returns the addition of 2 geospatical objects,
416        objects are concatencated to the end of each other
417           
418        NOTE: doesn't add if objects contain different
419        attributes 
420       
421        Always return relative points!
422        """
423
424        # find objects zone and checks if the same
425        geo_ref1 = self.get_geo_reference()
426        zone1 = geo_ref1.get_zone()
427       
428        geo_ref2 = other.get_geo_reference()
429        zone2 = geo_ref2.get_zone()
430
431        geo_ref1.reconcile_zones(geo_ref2)
432
433
434        # sets xll and yll as the smallest from self and other
435        # FIXME (Duncan and Ole): use lower left corner derived from
436        # absolute coordinates
437        if self.geo_reference.xllcorner <= other.geo_reference.xllcorner:
438            xll = self.geo_reference.xllcorner
439        else:
440            xll = other.geo_reference.xllcorner
441
442        if self.geo_reference.yllcorner <= other.geo_reference.yllcorner:
443            yll = self.geo_reference.yllcorner
444        else:
445            yll = other.geo_reference.yllcorner
446        new_geo_ref = Geo_reference(geo_ref1.get_zone(), xll, yll)
447
448        xll = yll = 0. 
449       
450        relative_points1 = self.get_data_points(absolute = False)
451        relative_points2 = other.get_data_points(absolute = False)
452
453       
454        new_relative_points1 = new_geo_ref.\
455                               change_points_geo_ref(relative_points1,
456                                                     geo_ref1)
457        new_relative_points2 = new_geo_ref.\
458                               change_points_geo_ref(relative_points2,
459                                                     geo_ref2)
460       
461        # Now both point sets are relative to new_geo_ref and
462        # zones have been reconciled
463
464        # Concatenate points
465        new_points = concatenate((new_relative_points1,
466                                  new_relative_points2),
467                                  axis = 0)
468     
469        # Concatenate attributes if any
470        if self.attributes is None:
471            if other.attributes is not None:
472                msg = 'Both geospatial_data objects must have the same \n'
473                msg += 'attributes to allow addition.'
474                raise Exception, msg
475           
476            new_attributes = None
477        else:   
478            new_attributes = {}
479            for x in self.attributes.keys():
480                if other.attributes.has_key(x):
481
482                    attrib1 = self.attributes[x]
483                    attrib2 = other.attributes[x]
484                    new_attributes[x] = concatenate((attrib1, attrib2))
485
486                else:
487                    msg = 'Both geospatial_data objects must have the same \n'
488                    msg += 'attributes to allow addition.'
489                    raise Exception, msg
490
491        # Instantiate new data object and return   
492        return Geospatial_data(new_points,
493                               new_attributes,
494                               new_geo_ref)
495   
496    ###
497    #  IMPORT/EXPORT POINTS FILES
498    ###
499
500    def import_points_file(self, file_name, delimiter=None, verbose=False):
501        """ load an .xya or .pts file
502        Note: will throw an IOError if it can't load the file.
503        Catch these!
504
505        Post condition: self.attributes dictionary has been set
506        """
507       
508        if access(file_name, F_OK) == 0 :
509            msg = 'File %s does not exist or is not accessible' %file_name
510            raise IOError, msg
511       
512        attributes = {}
513        if file_name[-4:]== ".xya":
514            try:
515                if delimiter == None:
516                    try:
517                        fd = open(file_name)
518                        data_points, attributes, geo_reference =\
519                                     _read_xya_file(fd, ',')
520                    except TitleError:
521                        fd.close()
522                        fd = open(file_name)
523                        data_points, attributes, geo_reference =\
524                                     _read_xya_file(fd, ' ')
525                else:
526                    fd = open(file_name)
527                    data_points, attributes, geo_reference =\
528                                 _read_xya_file(fd, delimiter)
529                fd.close()
530            except (IndexError,ValueError,SyntaxError):
531                fd.close()   
532                msg = 'Could not open file %s ' %file_name
533                raise IOError, msg
534            except IOError, e:
535                fd.close() 
536                # Catch this to add an error message
537                msg = 'Could not open file or incorrect file format %s:%s'\
538                      %(file_name, e)
539                raise IOError, msg
540               
541        elif file_name[-4:]== ".pts":
542            try:
543                data_points, attributes, geo_reference =\
544                             _read_pts_file(file_name, verbose)
545            except IOError, e:   
546                msg = 'Could not open file %s ' %file_name
547                raise IOError, msg 
548       
549        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
550            #let's do ticket#116 stuff
551            #
552            try:
553                data_points, attributes, geo_reference =\
554                             _read_csv_file(file_name, verbose)
555            except IOError, e:   
556                msg = 'Could not open file %s ' %file_name
557                raise IOError, msg       
558        else:     
559            msg = 'Extension %s is unknown' %file_name[-4:]
560            raise IOError, msg
561#        print'in import data_points', data_points
562#        print'in import attributes', attributes
563#        print'in import data_points', geo_reference
564        self.data_points = data_points
565        self.attributes = attributes
566        self.geo_reference = geo_reference
567   
568#        return all_data
569   
570    def export_points_file(self, file_name, absolute=True):
571       
572        """
573        write a points file, file_name, as a text (.xya) or binary (.pts) file
574        file_name is the file name, including the extension
575        The point_dict is defined at the top of this file.
576       
577        If absolute is True data points at returned added to the xll and yll
578        and geo_reference as None
579       
580        If absolute is False data points at returned as relative to the xll
581        and yll and geo_reference remains uneffected
582        """
583
584        if absolute is False and file_name[-4:] == ".xya":
585            msg = 'The text file values must be absolute.   '
586            msg += 'Text file format is moving to comma seperated .txt files.'
587            warn(msg, DeprecationWarning) 
588            error(msg, DeprecationWarning) 
589
590        if (file_name[-4:] == ".xya"):
591            msg = '.xya format is deprecated.  Please use .txt.'
592            warn(msg, DeprecationWarning)
593            #import sys; sys.exit()
594            if absolute is True:         
595                _write_xya_file(file_name,
596                                self.get_data_points(absolute=True), 
597                                self.get_all_attributes())
598            else:
599                _write_xya_file(file_name,
600                                self.get_data_points(absolute=False), 
601                                self.get_all_attributes(),
602                                self.get_geo_reference())
603                                   
604        elif (file_name[-4:] == ".pts"):
605            if absolute is True:
606                _write_pts_file(file_name,
607                                self.get_data_points(absolute), 
608                                self.get_all_attributes())
609            else:
610                _write_pts_file(file_name,
611                                self.get_data_points(absolute), 
612                                self.get_all_attributes(),
613                                self.get_geo_reference())
614               
615        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
616            msg = "ERROR: trying to write a .txt file with relative data."
617            assert absolute, msg
618            _write_csv_file(file_name,
619                            self.get_data_points(absolute=True), 
620                            self.get_all_attributes())
621                                   
622        else:
623            msg = 'Unknown file type %s ' %file_name
624            raise IOError, msg
625       
626    def get_sample(self, indices):
627        """ Returns a object which is a subset of the original
628        and the data points and attributes in this new object refer to
629        the indices provided
630       
631        Input
632            indices- a list of integers that represent the new object
633        Output
634            New geospatial data object representing points specified by
635            the indices
636            """
637        #FIXME: add the geo_reference to this
638       
639        points = self.get_data_points()
640        sampled_points = take(points, indices)
641
642        attributes = self.get_all_attributes()
643
644        sampled_attributes = {}
645        if attributes is not None:
646            for key, att in attributes.items():
647                sampled_attributes[key] = take(att, indices)
648
649        return Geospatial_data(sampled_points, sampled_attributes)
650   
651   
652    def split(self, factor=0.5):
653        """Returns two geospatial_data object, first is size of the 'factor'
654        smaller the original and the second is the remainer. The two new
655        object are disjoin set of each other.
656       
657        Points of the two new object have selected RANDOMLY.
658        AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
659       
660       
661        Input - the factor which to split the object, if 0.1 then 10% of the
662            object will be returned
663       
664        Output - two geospatial_data objects that are disjoint sets of the
665            original
666            """
667       
668        i=0
669        self_size = len(self)
670        random_list = []
671        remainder_list = []
672        new_size = round(factor*self_size)
673   #     print'Split original %s by %s' %(self_size, factor)
674   #     print'New samples are %s and %s in size' %(int(round(factor*self_size)),int(self_size-new_size))
675       
676        #find unique random numbers
677        while i < new_size:
678            random_num = randint(0,self_size-1)
679            if random_num not in random_list:
680                random_list.append(random_num)
681                i=i+1
682
683        #Make list of opposite to random_list
684        for i in range(0,self_size,1):
685            remainder_list.append(i)
686
687        #remove random list from remainder_list to get correct remainder_list
688        #need to sort and reverse so the pop() works correctly
689        random_list.sort()
690        random_list.reverse()
691        for i in random_list:
692            remainder_list.pop(i)
693           
694        #get new samples
695        G1 = self.get_sample(random_list)
696        G2 = self.get_sample(remainder_list)
697
698        return G1, G2
699
700    def __iter__(self):
701        # read in the header and save the file pointer position
702
703        #FIXME - what to do if the file isn't there
704
705        #FIXME - give warning if the file format is .xya
706        if self.file_name[-4:]== ".xya" or self.file_name[-4:]== ".pts":
707            #let's just read it all
708            pass
709        else:
710            file_pointer = open(self.file_name)
711            self.header, self.file_pointer = \
712                         _read_csv_file_header(file_pointer)
713
714            if self.max_read_lines is None:
715                self.max_read_lines = MAX_READ_LINES
716        return self
717   
718    def next(self):
719        # read a block, instanciate a new geospatial and return it
720        if self.file_name[-4:]== ".xya" or self.file_name[-4:]== ".pts":
721            if not hasattr(self,'finished_reading') or \
722                   self.finished_reading is False:
723                #let's just read it all
724                geo = Geospatial_data(self.file_name)
725                self.finished_reading = True
726            else:
727                raise StopIteration
728                self.finished_reading = False
729               
730        else:
731            try:
732                pointlist, att_dict, self.file_pointer = \
733                   _read_csv_file_blocking( self.file_pointer,
734                                         self.header[:],
735                                         max_read_lines=self.max_read_lines) 
736                geo = Geospatial_data(pointlist, att_dict)
737            except StopIteration:
738                self.file_pointer.close()
739                raise StopIteration
740        return geo
741
742def _read_pts_file(file_name, verbose=False):
743    """Read .pts NetCDF file
744   
745    Return a dic of array of points, and dic of array of attribute
746    eg
747    dic['points'] = [[1.0,2.0],[3.0,5.0]]
748    dic['attributelist']['elevation'] = [[7.0,5.0]
749    """   
750
751    from Scientific.IO.NetCDF import NetCDFFile
752   
753    if verbose: print 'Reading ', file_name
754   
755       
756    # see if the file is there.  Throw a QUIET IO error if it isn't
757    fd = open(file_name,'r')
758    fd.close()
759   
760    #throws prints to screen if file not present
761    fid = NetCDFFile(file_name, 'r') 
762   
763#    point_atts = {} 
764        # Get the variables
765#    point_atts['pointlist'] = array(fid.variables['points'])
766    pointlist = array(fid.variables['points'])
767    keys = fid.variables.keys()
768    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
769    try:
770        keys.remove('points')
771    except IOError, e:       
772        fid.close()   
773        msg = 'Expected keyword "points" but could not find it'
774        raise IOError, msg
775   
776    attributes = {}
777    for key in keys:
778        if verbose: print "reading attribute '%s'" %key
779           
780        attributes[key] = array(fid.variables[key])
781   
782#    point_atts['attributelist'] = attributes
783   
784    try:
785        geo_reference = Geo_reference(NetCDFObject=fid)
786#        point_atts['geo_reference'] = geo_reference
787    except AttributeError, e:
788        #geo_ref not compulsory
789#        point_atts['geo_reference'] = None
790        geo_reference = None
791   
792    fid.close()
793   
794    return pointlist, attributes, geo_reference
795
796
797def _read_csv_file(file_name, verbose=False):
798    """Read .csv file
799   
800    Return a dic of array of points, and dic of array of attribute
801    eg
802    dic['points'] = [[1.0,2.0],[3.0,5.0]]
803    dic['attributelist']['elevation'] = [[7.0,5.0]
804    """
805   
806    #from anuga.shallow_water.data_manager import Exposure_csv
807    #csv =Exposure_csv(file_name)
808   
809    file_pointer = open(file_name)
810    header, file_pointer = _read_csv_file_header(file_pointer)
811
812    while True:
813        try:
814            pointlist, att_dict,file_pointer  = _read_csv_file_blocking( \
815                file_pointer,
816                header,
817                max_read_lines=MAX_READ_LINES) #FIXME: must be highest int
818        except StopIteration:
819            break
820       
821    file_pointer.close()
822    return pointlist, att_dict, None   
823
824CSV_DELIMITER = ','
825def _read_csv_file_header(file_pointer,
826                          delimiter=CSV_DELIMITER,
827                          verbose=False):
828
829    """Read the header of a .csv file
830    Return a list of the header names
831    """
832    line = file_pointer.readline()
833    header = clean_line(line, delimiter)
834    return header, file_pointer
835
836def _read_csv_file_blocking(file_pointer, header,
837                            delimiter=CSV_DELIMITER,
838                            max_read_lines=MAX_READ_LINES,
839                            verbose=False):
840   
841
842    """
843    Read the body of a .csv file.
844    header: The list header of the csv file, with the x and y labels.
845    """
846    points = []
847    pointattributes = []
848    att_dict = {}
849
850    #This is to remove the x and y headers.
851    header = header[:]
852    header.pop(0)
853    header.pop(0)
854   
855    read_lines = 0
856    while read_lines<max_read_lines:
857        line = file_pointer.readline()
858        #print "line",line
859        numbers = clean_line(line,delimiter)
860        if len(numbers) <= 1:
861            break
862        if line[0] == '#':
863            continue
864        read_lines += 1
865        if True: # remove.. #if numbers != []:
866            try:
867                x = float(numbers[0])
868                y = float(numbers[1])
869                points.append([x,y])
870                numbers.pop(0)
871                numbers.pop(0)
872                if len(header) != len(numbers):
873                   
874                    file_pointer.close() 
875                    # It might not be a problem with the header
876                    #raise TitleAmountError
877                    msg = "File load error.  There might be a problem with the file header"
878                    raise IOError, msg
879                for i,num in enumerate(numbers):
880                    num.strip()
881                    if num != '\n' and num != '':
882                        #attributes.append(float(num))
883                        att_dict.setdefault(header[i],[]).append(float(num))
884            #except IOError:           
885            except ValueError:
886                raise SyntaxError
887    if points == []:
888        raise StopIteration
889       
890   
891    pointlist = array(points).astype(Float)
892    for key in att_dict.keys():
893        att_dict[key] = array(att_dict[key]).astype(Float)
894       
895    return pointlist, att_dict,file_pointer
896
897def _read_xya_file(fd, delimiter):
898    points = []
899    pointattributes = []
900    title = fd.readline()
901    att_names = clean_line(title,delimiter)
902    att_dict = {}
903    line = fd.readline()
904    numbers = clean_line(line,delimiter)
905   
906    while len(numbers) > 1 and line[0] <> '#':
907        if numbers != []:
908            try:
909                x = float(numbers[0])
910                y = float(numbers[1])
911                points.append([x,y])
912                numbers.pop(0)
913                numbers.pop(0)
914                if len(att_names) != len(numbers):
915                    fd.close()
916                    # It might not be a problem with the title
917                    #raise TitleAmountError
918                    raise IOError
919                for i,num in enumerate(numbers):
920                    num.strip()
921                    if num != '\n' and num != '':
922                        #attributes.append(float(num))
923                        att_dict.setdefault(att_names[i],[]).append(float(num))
924            except ValueError:
925                raise SyntaxError
926        line = fd.readline()
927        numbers = clean_line(line,delimiter)
928   
929    if line == '':
930        geo_reference = None
931    else:
932        geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
933       
934   
935    pointlist = array(points).astype(Float)
936    for key in att_dict.keys():
937        att_dict[key] = array(att_dict[key]).astype(Float)
938   
939    return pointlist, att_dict, geo_reference
940
941def _write_pts_file(file_name,
942                    write_data_points,
943                    write_attributes=None, 
944                    write_geo_reference=None):
945    """
946    Write .pts NetCDF file   
947
948    NOTE: Below might not be valid ask Duncan : NB 5/2006
949   
950    WARNING: This function mangles the point_atts data structure
951    #F??ME: (DSG)This format has issues.
952    # There can't be an attribute called points
953    # consider format change
954    # method changed by NB not sure if above statement is correct
955   
956    should create new test for this
957    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
958    for key in point_atts.keys():
959        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
960        assert key in legal_keys, msg
961    """   
962    from Scientific.IO.NetCDF import NetCDFFile
963    # NetCDF file definition
964    outfile = NetCDFFile(file_name, 'w')
965   
966    #Create new file
967    outfile.institution = 'Geoscience Australia'
968    outfile.description = 'NetCDF format for compact and portable storage ' +\
969                          'of spatial point data'
970   
971    # dimension definitions
972    shape = write_data_points.shape[0]
973    outfile.createDimension('number_of_points', shape) 
974    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
975   
976    # variable definition
977    outfile.createVariable('points', Float, ('number_of_points',
978                                             'number_of_dimensions'))
979
980    #create variables 
981    outfile.variables['points'][:] = write_data_points #.astype(Float32)
982
983    if write_attributes is not None:
984        for key in write_attributes.keys():
985            outfile.createVariable(key, Float, ('number_of_points',))
986            outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
987       
988    if write_geo_reference is not None:
989        write_geo_reference.write_NetCDF(outfile)
990       
991    outfile.close() 
992 
993
994
995def _write_xya_file(file_name,
996                    write_data_points,
997                    write_attributes=None, 
998                    write_geo_reference=None, 
999                    delimiter=','):
1000    """
1001    export a file, file_name, with the xya format
1002   
1003    """
1004    points = write_data_points
1005    pointattributes = write_attributes
1006   
1007    fd = open(file_name,'w')
1008    titlelist = ""
1009    if pointattributes is not None:   
1010        for title in pointattributes.keys():
1011            titlelist = titlelist + title + delimiter
1012        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1013    fd.write(titlelist+"\n")
1014   
1015    #<vertex #> <x> <y> [attributes]
1016    for i, vert in enumerate( points):
1017
1018
1019        if pointattributes is not None:           
1020            attlist = ","
1021            for att in pointattributes.keys():
1022                attlist = attlist + str(pointattributes[att][i])+ delimiter
1023            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1024            attlist.strip()
1025        else:
1026            attlist = ''
1027
1028        fd.write(str(vert[0]) + delimiter +
1029                 str(vert[1]) + attlist + "\n")
1030
1031    if  write_geo_reference is not None:
1032        write_geo_reference.write_ASCII(fd)
1033    fd.close()
1034
1035
1036def _write_csv_file(file_name,
1037                    write_data_points,
1038                    write_attributes=None, 
1039                    delimiter=','):
1040    """
1041    export a file, file_name, with the xya format
1042   
1043    """
1044    points = write_data_points
1045    pointattributes = write_attributes
1046   
1047    fd = open(file_name,'w')
1048    titlelist = "x" + delimiter + "y"  + delimiter
1049    if pointattributes is not None:   
1050        for title in pointattributes.keys():
1051            titlelist = titlelist + title + delimiter
1052        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1053    fd.write(titlelist+"\n")
1054   
1055    #<vertex #> <x> <y> [attributes]
1056    for i, vert in enumerate( points):
1057
1058
1059        if pointattributes is not None:           
1060            attlist = ","
1061            for att in pointattributes.keys():
1062                attlist = attlist + str(pointattributes[att][i])+ delimiter
1063            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1064            attlist.strip()
1065        else:
1066            attlist = ''
1067
1068        fd.write(str(vert[0]) + delimiter +
1069                 str(vert[1]) + attlist + "\n")
1070
1071    fd.close()
1072   
1073def _point_atts2array(point_atts):
1074    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1075   
1076    for key in point_atts['attributelist'].keys():
1077        point_atts['attributelist'][key]=\
1078                array(point_atts['attributelist'][key]).astype(Float)
1079    return point_atts
1080
1081   
1082
1083
1084def geospatial_data2points_dictionary(geospatial_data):
1085    """Convert geospatial data to points_dictionary
1086    """
1087
1088    points_dictionary = {}
1089    points_dictionary['pointlist'] = geospatial_data.data_points
1090
1091    points_dictionary['attributelist'] = {}
1092
1093    for attribute_name in geospatial_data.attributes.keys():
1094        val = geospatial_data.attributes[attribute_name]
1095        points_dictionary['attributelist'][attribute_name] = val
1096
1097    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1098
1099    return points_dictionary
1100
1101   
1102def points_dictionary2geospatial_data(points_dictionary):
1103    """Convert points_dictionary to geospatial data object
1104    """
1105
1106    msg = 'Points dictionary must have key pointlist' 
1107    assert points_dictionary.has_key('pointlist'), msg
1108
1109    msg = 'Points dictionary must have key attributelist'     
1110    assert points_dictionary.has_key('attributelist'), msg       
1111
1112    if points_dictionary.has_key('geo_reference'):
1113        geo = points_dictionary['geo_reference']
1114    else:
1115        geo = None
1116   
1117    return Geospatial_data(points_dictionary['pointlist'],
1118                           points_dictionary['attributelist'],
1119                           geo_reference = geo)
1120
1121def clean_line(line,delimiter):     
1122    """Remove whitespace
1123    """
1124    #print ">%s" %line
1125    line = line.strip()
1126    #print "stripped>%s" %line
1127    numbers = line.split(delimiter)
1128    i = len(numbers) - 1
1129    while i >= 0:
1130        if numbers[i] == '':
1131            numbers.pop(i)
1132        else:
1133            numbers[i] = numbers[i].strip()
1134       
1135        i += -1
1136    #for num in numbers:
1137    #    print "num>%s<" %num
1138    return numbers
1139           
1140def ensure_absolute(points, geo_reference=None):
1141    """
1142    This function inputs several formats and
1143    outputs one format. - a numeric array of absolute points.
1144
1145    Inputed formats are;
1146    points: List or numeric array of coordinate pairs [xi, eta] of
1147              points or geospatial object or points file name
1148
1149    mesh_origin: A geo_reference object or 3-tuples consisting of
1150                 UTM zone, easting and northing.
1151                 If specified vertex coordinates are assumed to be
1152                 relative to their respective origins.
1153    """
1154    if isinstance(points,type('')):
1155        #It's a string
1156        #assume it is a point file
1157        points = Geospatial_data(file_name = points)
1158       
1159    if isinstance(points,Geospatial_data):
1160        points = points.get_data_points( \
1161                absolute = True)
1162        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1163        assert geo_reference == None, msg
1164           
1165    else:
1166        points = ensure_numeric(points, Float)
1167    if geo_reference is None:
1168        geo = None #Geo_reference()
1169    else:
1170        if isinstance(geo_reference, Geo_reference):
1171            geo = geo_reference
1172        else:
1173            geo = Geo_reference(geo_reference[0],
1174                                geo_reference[1],
1175                                geo_reference[2])
1176        points = geo.get_absolute(points)
1177    return points
1178     
1179
1180def ensure_geospatial(points, geo_reference=None):
1181    """
1182    This function inputs several formats and
1183    outputs one format. - a geospatial_data instance.
1184
1185    Inputed formats are;
1186    points: List or numeric array of coordinate pairs [xi, eta] of
1187              points or geospatial object
1188
1189    mesh_origin: A geo_reference object or 3-tuples consisting of
1190                 UTM zone, easting and northing.
1191                 If specified vertex coordinates are assumed to be
1192                 relative to their respective origins.
1193    """
1194    if isinstance(points,Geospatial_data):
1195        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1196        assert geo_reference == None, msg
1197        return points   
1198    else:
1199        points = ensure_numeric(points, Float)
1200    if geo_reference is None:
1201        geo = None
1202    else:
1203        if isinstance(geo_reference, Geo_reference):
1204            geo = geo_reference
1205        else:
1206            geo = Geo_reference(geo_reference[0],
1207                                geo_reference[1],
1208                                geo_reference[2])
1209    points = Geospatial_data(data_points=points, geo_reference=geo)       
1210    return points
1211
1212#def file2xya(filename):
1213   
1214#    G = Geospatial_data(filename)
1215#    G.export_points_file()
1216
1217             
1218
1219         
1220if __name__ == "__main__":
1221    g = Geospatial_data("t.txt")
1222    print "g.get_data_points()", g.get_data_points()
1223    for i,a in enumerate(g):
1224        if i >3: break 
1225        print a
1226   
Note: See TracBrowser for help on using the repository browser.