source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4203

Last change on this file since 4203 was 4202, checked in by duncan, 18 years ago

sigh. the 'bug' that I fixed in 4199 wasn't a bug. It was correct. Reverting by doing svn merge -r4199:4198 https://datamining.anu.edu.au/svn/ga/ in inundation dir

File size: 47.1 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5from sys import maxint
6from os import access, F_OK, R_OK
7from types import DictType
8from warnings import warn
9from string import lower
10from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11                        size, shape
12from random import randint
13#from MA import tolist
14
15from Scientific.IO.NetCDF import NetCDFFile   
16   
17from anuga.utilities.numerical_tools import ensure_numeric
18from anuga.coordinate_transforms.geo_reference import Geo_reference, TitleError
19from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
20from anuga.utilities.anuga_exceptions import ANUGAError
21
22MAX_READ_LINES = 500       
23class Geospatial_data:
24
25    def __init__(self,
26                 data_points=None, # this can also be a points file name
27                 attributes=None,
28                 geo_reference=None,
29                 default_attribute_name=None,
30                 file_name=None,
31                 delimiter=None,
32                 latitudes=None,
33                 longitudes=None,
34                 points_are_lats_longs=False,
35                 max_read_lines=None,
36                 load_file_now=True,
37                 verbose=False):
38
39       
40        """
41        Create instance from data points and associated attributes
42
43        data_points: x,y coordinates in meters. Type must be either a
44        sequence of 2-tuples or an Mx2 Numeric array of floats.  A file name
45        can also be passed in here.
46
47        attributes: Associated values for each data point. The type
48        must be either a list or an array of length M or a dictionary
49        of lists (or arrays) of length M. In the latter case the keys
50        in the dictionary represent the attribute names, in the former
51        the attribute will get the default name "attribute".
52       
53        geo_reference: Object representing the origin of the data
54        points. It contains UTM zone, easting and northing and data
55        points are assumed to be relative to this origin.
56        If geo_reference is None, the default geo ref object is used.
57
58        default_attribute_name: Name of default attribute to be used with
59        get_attribute_values. The idea is that the dataset can be
60        equipped with information about which attribute to return.
61        If None, the default is the "first"
62
63        latitudes, longitudes: Vectors of latitudes and longitudes,
64        used to specify location instead of points.
65       
66        points_are_lats_longs: Set this as true if the points are actually
67        lats and longs, not UTM
68
69        max_read_lines: The number of rows read into memory when using
70        blocking to read a file.
71
72        load_file_now:  If true the file is automatically loaded
73        into the geospatial instance. Used when blocking.
74       
75        file_name: Name of input netCDF file or .txt file. netCDF file must
76        have dimensions "points" etc.
77        .txt file is a comma seperated file with x, y and attribute
78        data.
79       
80        The first line has the titles of the columns.  The first two
81        column titles are checked to see if they start with lat or
82        long (not case sensitive).  If so the data is assumed to be
83        latitude and longitude, in decimal format and converted to
84        UTM.  Otherwise the first two columns are assumed to be the x
85        and y, and the title names acually used are ignored.
86 
87       
88        The format for a .txt file is:
89            1st line:     [column names]
90            other lines:  x y [attributes]
91
92            for example:
93            x, y, elevation, friction
94            0.6, 0.7, 4.9, 0.3
95            1.9, 2.8, 5, 0.3
96            2.7, 2.4, 5.2, 0.3
97
98        The first two columns are always  assumed to be x, y
99        coordinates.
100     
101        An issue with the xya format is that the attribute column order
102        is not be controlled.  The info is stored in a dictionary and it's
103        written in an order dependent on the hash order
104       
105        The format for a Points dictionary is:
106
107          ['pointlist'] a 2 column array describing points. 1st column x,
108          2nd column y.
109          ['attributelist'], a dictionary of 1D arrays, representing
110          attribute values at the point.  The dictionary key is the attribute
111          header.
112          ['geo_reference'] a Geo_refernece object. Use if the point
113          information is relative. This is optional.
114            eg
115            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
116            dic['attributelist']['elevation'] = [[7.0,5.0]
117               
118        delimiter: is the file delimiter that will be used when
119            importing a .xya file, which is being phased out.
120           
121        verbose:
122
123         
124        """
125
126        if isinstance(data_points, basestring):
127            # assume data point is really a file name
128            file_name = data_points
129
130        self.set_verbose(verbose)
131        self.geo_reference=None #create the attribute
132        self.file_name = file_name
133        self.max_read_lines = max_read_lines
134
135        if delimiter is not None:
136            msg = 'Specifying delimiters will be removed.'
137            msg = 'Text file format is moving to comma seperated .txt files.'
138            warn(msg, DeprecationWarning) 
139        if file_name is None:
140            if delimiter is not None:
141                msg = 'No file specified yet a delimiter is provided!'
142                raise ValueError, msg
143            file_name = None
144            if latitudes is not None or longitudes is not None or \
145                   points_are_lats_longs:
146                data_points, geo_reference =  \
147                             _set_using_lat_long(latitudes=latitudes,
148                                  longitudes=longitudes,
149                                  geo_reference=geo_reference,
150                                  data_points=data_points,
151                                  points_are_lats_longs=points_are_lats_longs)
152            self.check_data_points(data_points)
153            self.set_attributes(attributes)
154            self.set_geo_reference(geo_reference)
155            self.set_default_attribute_name(default_attribute_name)
156
157        elif load_file_now is True:
158            # watch for case where file name and points,
159            # attributes etc are provided!!
160            # if file name then all provided info will be removed!
161            self.import_points_file(file_name, delimiter, verbose)
162               
163            self.check_data_points(self.data_points)
164            self.set_attributes(self.attributes) 
165            self.set_geo_reference(self.geo_reference)
166            self.set_default_attribute_name(default_attribute_name)
167
168        #Why?   
169        #assert self.attributes is None or isinstance(self.attributes, DictType)
170        #This is a hassle when blocking, so I've removed it.
171       
172
173    def __len__(self):
174        return len(self.data_points)
175
176    def __repr__(self):
177        return str(self.get_data_points(absolute=True))
178   
179   
180    def check_data_points(self, data_points):
181        """Checks data points
182        """
183       
184        if data_points is None:
185            self.data_points = None
186            msg = 'There is no data or file provided!'
187            raise ValueError, msg
188           
189        else:
190            self.data_points = ensure_numeric(data_points)
191            #print "self.data_points.shape",self.data_points.shape
192            if not (0,) == self.data_points.shape:
193                assert len(self.data_points.shape) == 2
194                assert self.data_points.shape[1] == 2
195
196    def set_attributes(self, attributes):
197        """Check and assign attributes dictionary
198        """
199       
200        if attributes is None:
201            self.attributes = None
202            return
203       
204        if not isinstance(attributes, DictType):
205            #Convert single attribute into dictionary
206            attributes = {'attribute': attributes}
207
208        #Check input attributes   
209        for key in attributes.keys():
210            try:
211                attributes[key] = ensure_numeric(attributes[key])
212            except:
213                msg = 'Attribute %s could not be converted' %key
214                msg += 'to a numeric vector'
215                raise msg
216
217        self.attributes = attributes   
218
219
220    def set_geo_reference(self, geo_reference):
221
222        from anuga.coordinate_transforms.geo_reference import Geo_reference
223
224        if geo_reference is None:
225            geo_reference = Geo_reference() # Use default
226        if not isinstance(geo_reference, Geo_reference):
227            msg = 'Argument geo_reference must be a valid Geo_reference \n'
228            msg += 'object or None.'
229            raise msg
230
231        # if a geo ref already exists, change the point data to
232        # represent the new geo-ref
233        if  self.geo_reference is not None:
234            #FIXME: Maybe put out a warning here...
235            self.data_points = self.get_data_points \
236                               (geo_reference=geo_reference)
237           
238        self.geo_reference = geo_reference
239
240
241    def set_default_attribute_name(self, default_attribute_name):
242        self.default_attribute_name = default_attribute_name
243
244    def set_verbose(self, verbose=False):
245        if verbose in [False, True]:
246            self.verbose = verbose
247        else:
248            msg = 'Illegal value: %s' %str(verbose)
249            raise Exception, msg
250
251    def clip(self, polygon, closed=True):
252        """Clip geospatial data by a polygon
253
254        Input
255          polygon - Either a list of points, an Nx2 array or
256                    a Geospatial data object.
257          closed - (optional) determine whether points on boundary should be
258          regarded as belonging to the polygon (closed = True)
259          or not (closed = False). Default is True.
260           
261        Output
262          New geospatial data object representing points inside
263          specified polygon.
264       
265        """
266
267        from anuga.utilities.polygon import inside_polygon
268
269        if isinstance(polygon, Geospatial_data):
270            # Polygon is an object - extract points
271            polygon = polygon.get_data_points()
272
273        points = self.get_data_points()   
274        inside_indices = inside_polygon(points, polygon, closed)
275
276        clipped_G = self.get_sample(inside_indices)
277#        clipped_points = take(points, inside_indices)
278
279        # Clip all attributes
280#        attributes = self.get_all_attributes()
281
282#        clipped_attributes = {}
283#        if attributes is not None:
284#            for key, att in attributes.items():
285#                clipped_attributes[key] = take(att, inside_indices)
286
287#        return Geospatial_data(clipped_points, clipped_attributes)
288        return clipped_G
289       
290       
291    def clip_outside(self, polygon, closed=True):
292        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
293
294        Input
295          polygon - Either a list of points, an Nx2 array or
296                    a Geospatial data object.
297          closed - (optional) determine whether points on boundary should be
298          regarded as belonging to the polygon (closed = True)
299          or not (closed = False). Default is True.
300         
301        Output
302          Geospatial data object representing point OUTSIDE specified polygon
303       
304        """
305
306        from anuga.utilities.polygon import outside_polygon
307
308        if isinstance(polygon, Geospatial_data):
309            # Polygon is an object - extract points
310            polygon = polygon.get_data_points()
311
312        points = self.get_data_points()   
313        outside_indices = outside_polygon(points, polygon, closed)
314
315        clipped_G = self.get_sample(outside_indices)
316
317#        clipped_points = take(points, outside_indices)
318
319        # Clip all attributes
320#        attributes = self.get_all_attributes()
321
322#        clipped_attributes = {}
323#        if attributes is not None:
324#            for key, att in attributes.items():
325#                clipped_attributes[key] = take(att, outside_indices)
326
327#        return Geospatial_data(clipped_points, clipped_attributes)
328        return clipped_G
329
330   
331    def get_geo_reference(self):
332        return self.geo_reference
333       
334    def get_data_points(self, absolute=True, geo_reference=None):
335        """Get coordinates for all data points as an Nx2 array
336
337        If absolute is False returned coordinates are relative to the
338        internal georeference's xll and yll corners, otherwise
339        absolute UTM coordinates are returned.
340
341        If a geo_reference is passed the points are returned relative
342        to that geo_reference.
343
344        Default: absolute is True.
345        """
346
347        if absolute is True and geo_reference is None:
348            return self.geo_reference.get_absolute(self.data_points)
349        elif geo_reference is not None:
350            return geo_reference.change_points_geo_ref \
351                               (self.data_points, 
352                                self.geo_reference)
353        else:
354            return self.data_points
355       
356   
357    def get_attributes(self, attribute_name=None):
358        """Return values for one named attribute.
359
360        If attribute_name is None, default_attribute_name is used
361        """
362
363        if attribute_name is None:
364            if self.default_attribute_name is not None:
365                attribute_name = self.default_attribute_name
366            else:
367                attribute_name = self.attributes.keys()[0] 
368                # above line takes the first one from keys
369       
370        if self.verbose is True:
371            print 'Using attribute %s' %attribute_name
372            print 'Available attributes: %s' %(self.attributes.keys())       
373
374        msg = 'Attribute name %s does not exist in data set' %attribute_name
375        assert self.attributes.has_key(attribute_name), msg
376
377        return self.attributes[attribute_name]
378
379    def get_all_attributes(self):
380        """
381        Return values for all attributes.
382        The return value is either None or a dictionary (possibly empty).
383        """
384
385        return self.attributes
386
387    def __add__(self, other):
388        """
389        Returns the addition of 2 geospatical objects,
390        objects are concatencated to the end of each other
391           
392        NOTE: doesn't add if objects contain different
393        attributes 
394       
395        Always return relative points!
396        """
397
398        # find objects zone and checks if the same
399        geo_ref1 = self.get_geo_reference()
400        zone1 = geo_ref1.get_zone()
401       
402        geo_ref2 = other.get_geo_reference()
403        zone2 = geo_ref2.get_zone()
404
405        geo_ref1.reconcile_zones(geo_ref2)
406
407
408        # sets xll and yll as the smallest from self and other
409        # FIXME (Duncan and Ole): use lower left corner derived from
410        # absolute coordinates
411        if self.geo_reference.xllcorner <= other.geo_reference.xllcorner:
412            xll = self.geo_reference.xllcorner
413        else:
414            xll = other.geo_reference.xllcorner
415
416        if self.geo_reference.yllcorner <= other.geo_reference.yllcorner:
417            yll = self.geo_reference.yllcorner
418        else:
419            yll = other.geo_reference.yllcorner
420        new_geo_ref = Geo_reference(geo_ref1.get_zone(), xll, yll)
421
422        xll = yll = 0. 
423       
424        relative_points1 = self.get_data_points(absolute = False)
425        relative_points2 = other.get_data_points(absolute = False)
426
427       
428        new_relative_points1 = new_geo_ref.\
429                               change_points_geo_ref(relative_points1,
430                                                     geo_ref1)
431        new_relative_points2 = new_geo_ref.\
432                               change_points_geo_ref(relative_points2,
433                                                     geo_ref2)
434       
435        # Now both point sets are relative to new_geo_ref and
436        # zones have been reconciled
437
438        # Concatenate points
439        new_points = concatenate((new_relative_points1,
440                                  new_relative_points2),
441                                  axis = 0)
442     
443        # Concatenate attributes if any
444        if self.attributes is None:
445            if other.attributes is not None:
446                msg = 'Both geospatial_data objects must have the same \n'
447                msg += 'attributes to allow addition.'
448                raise Exception, msg
449           
450            new_attributes = None
451        else:   
452            new_attributes = {}
453            for x in self.attributes.keys():
454                if other.attributes.has_key(x):
455
456                    attrib1 = self.attributes[x]
457                    attrib2 = other.attributes[x]
458                    new_attributes[x] = concatenate((attrib1, attrib2))
459
460                else:
461                    msg = 'Both geospatial_data objects must have the same \n'
462                    msg += 'attributes to allow addition.'
463                    raise Exception, msg
464
465        # Instantiate new data object and return   
466        return Geospatial_data(new_points,
467                               new_attributes,
468                               new_geo_ref)
469   
470    ###
471    #  IMPORT/EXPORT POINTS FILES
472    ###
473
474    def import_points_file(self, file_name, delimiter=None, verbose=False):
475        """ load an .xya or .pts file
476        Note: will throw an IOError if it can't load the file.
477        Catch these!
478
479        Post condition: self.attributes dictionary has been set
480        """
481       
482        if access(file_name, F_OK) == 0 :
483            msg = 'File %s does not exist or is not accessible' %file_name
484            raise IOError, msg
485       
486        attributes = {}
487        if file_name[-4:]== ".xya":
488            msg = 'Text file format is moving to comma seperated .txt files.'
489            warn(msg, DeprecationWarning) 
490            try:
491                if delimiter == None:
492                    try:
493                        fd = open(file_name)
494                        data_points, attributes, geo_reference =\
495                                     _read_xya_file(fd, ',')
496                    except TitleError:
497                        fd.close()
498                        fd = open(file_name)
499                        data_points, attributes, geo_reference =\
500                                     _read_xya_file(fd, ' ')
501                else:
502                    fd = open(file_name)
503                    data_points, attributes, geo_reference =\
504                                 _read_xya_file(fd, delimiter)
505                fd.close()
506            except (IndexError,ValueError,SyntaxError):
507                fd.close()   
508                msg = 'Could not open file %s ' %file_name
509                raise IOError, msg
510            except IOError, e:
511                fd.close() 
512                # Catch this to add an error message
513                msg = 'Could not open file or incorrect file format %s:%s'\
514                      %(file_name, e)
515                raise IOError, msg
516               
517        elif file_name[-4:]== ".pts":
518            try:
519                data_points, attributes, geo_reference =\
520                             _read_pts_file(file_name, verbose)
521            except IOError, e:   
522                msg = 'Could not open file %s ' %file_name
523                raise IOError, msg 
524       
525        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
526            #let's do ticket#116 stuff
527            #
528            try:
529                data_points, attributes, geo_reference =\
530                             _read_csv_file(file_name, verbose)
531            except IOError, e:   
532                msg = 'Could not open file %s ' %file_name
533                raise IOError, msg       
534        else:     
535            msg = 'Extension %s is unknown' %file_name[-4:]
536            raise IOError, msg
537#        print'in import data_points', data_points
538#        print'in import attributes', attributes
539#        print'in import data_points', geo_reference
540        self.data_points = data_points
541        self.attributes = attributes
542        self.geo_reference = geo_reference
543   
544#        return all_data
545   
546    def export_points_file(self, file_name, absolute=True):
547       
548        """
549        write a points file, file_name, as a text (.xya) or binary (.pts) file
550        file_name is the file name, including the extension
551        The point_dict is defined at the top of this file.
552       
553        If absolute is True data points at returned added to the xll and yll
554        and geo_reference as None
555       
556        If absolute is False data points at returned as relative to the xll
557        and yll and geo_reference remains uneffected
558        """
559
560        if absolute is False and file_name[-4:] == ".xya":
561            msg = 'The text file values must be absolute.   '
562            msg += 'Text file format is moving to comma seperated .txt files.'
563            warn(msg, DeprecationWarning) 
564
565        if (file_name[-4:] == ".xya"):
566            msg = '.xya format is deprecated.  Please use .txt.'
567            warn(msg, DeprecationWarning)
568            if absolute is True:         
569                _write_xya_file(file_name,
570                                self.get_data_points(absolute=True), 
571                                self.get_all_attributes())
572            else:
573                _write_xya_file(file_name,
574                                self.get_data_points(absolute=False), 
575                                self.get_all_attributes(),
576                                self.get_geo_reference())
577                                   
578        elif (file_name[-4:] == ".pts"):
579            if absolute is True:
580                _write_pts_file(file_name,
581                                self.get_data_points(absolute), 
582                                self.get_all_attributes())
583            else:
584                _write_pts_file(file_name,
585                                self.get_data_points(absolute), 
586                                self.get_all_attributes(),
587                                self.get_geo_reference())
588               
589        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
590            msg = "ERROR: trying to write a .txt file with relative data."
591            assert absolute, msg
592            _write_csv_file(file_name,
593                            self.get_data_points(absolute=True), 
594                            self.get_all_attributes())
595                                   
596        else:
597            msg = 'Unknown file type %s ' %file_name
598            raise IOError, msg
599       
600    def get_sample(self, indices):
601        """ Returns a object which is a subset of the original
602        and the data points and attributes in this new object refer to
603        the indices provided
604       
605        Input
606            indices- a list of integers that represent the new object
607        Output
608            New geospatial data object representing points specified by
609            the indices
610            """
611        #FIXME: add the geo_reference to this
612       
613        points = self.get_data_points()
614        sampled_points = take(points, indices)
615
616        attributes = self.get_all_attributes()
617
618        sampled_attributes = {}
619        if attributes is not None:
620            for key, att in attributes.items():
621                sampled_attributes[key] = take(att, indices)
622
623        return Geospatial_data(sampled_points, sampled_attributes)
624   
625   
626    def split(self, factor=0.5, verbose=False):
627        """Returns two geospatial_data object, first is size of the 'factor'
628        smaller the original and the second is the remainer. The two new
629        object are disjoin set of each other.
630       
631        Points of the two new object have selected RANDOMLY.
632        AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
633       
634       
635        Input - the factor which to split the object, if 0.1 then 10% of the
636            object will be returned
637       
638        Output - two geospatial_data objects that are disjoint sets of the
639            original
640            """
641       
642        i=0
643        self_size = len(self)
644        random_list = []
645        remainder_list = []
646        new_size = round(factor*self_size)
647   #     print'Split original %s by %s' %(self_size, factor)
648   #     print'New samples are %s and %s in size' %(int(round(factor*self_size)),int(self_size-new_size))
649       
650        #find unique random numbers
651        if verbose: print "make unique random number list and get indices"
652        while i < new_size:
653            random_num = randint(0,self_size-1)
654            if random_num not in random_list:
655                random_list.append(random_num)
656                i=i+1
657
658        #Make list of opposite to random_list
659        if verbose: print "make list of opposite to random list"
660        for i in range(0,self_size,1):
661            remainder_list.append(i)
662
663        #remove random list from remainder_list to get correct remainder_list
664        #need to sort and reverse so the pop() works correctly
665        random_list.sort()
666        random_list.reverse()
667        if verbose: print "get indices of oppisite to random list"
668        for i in random_list:
669            remainder_list.pop(i)
670           
671        #get new samples
672        if verbose: print "get values of indices for random list"
673        G1 = self.get_sample(random_list)
674        if verbose: print "get values of indices for opposite of random list"
675        G2 = self.get_sample(remainder_list)
676
677        return G1, G2
678
679    def __iter__(self):
680        """
681        read in the header and save the file pointer position
682        """
683
684        from Scientific.IO.NetCDF import NetCDFFile
685       
686        #FIXME - what to do if the file isn't there
687
688        if self.file_name[-4:]== ".xya":
689            #let's just read it all
690            pass
691        elif self.file_name[-4:]== ".pts":
692           
693            # see if the file is there.  Throw a QUIET IO error if it isn't
694            fd = open(self.file_name,'r')
695            fd.close()
696   
697            #throws prints to screen if file not present
698            self.fid = NetCDFFile(self.file_name, 'r')
699           
700            self.blocking_georef, self.blocking_keys, self.last_row = \
701                     _read_pts_file_header(self.fid, self.verbose)
702            self.start_row=0
703        else:
704            file_pointer = open(self.file_name)
705            self.header, self.file_pointer = \
706                         _read_csv_file_header(file_pointer)
707            self.blocking_georef = None # Used for reconciling zones
708        if self.max_read_lines is None:
709            self.max_read_lines = MAX_READ_LINES
710        return self
711   
712    def next(self):
713        # read a block, instanciate a new geospatial and return it
714        if self.file_name[-4:]== ".xya" :
715            if not hasattr(self,'finished_reading') or \
716                   self.finished_reading is False:
717                #let's just read it all
718                geo = Geospatial_data(self.file_name)
719                self.finished_reading = True
720            else:
721                raise StopIteration
722                self.finished_reading = False
723               
724        elif self.file_name[-4:]== ".pts":
725            if self.start_row == self.last_row:
726                # read the end of the file last iteration
727                # remove blocking attributes
728                self.fid.close()
729                del self.max_read_lines
730                del self.blocking_georef
731                del self.last_row
732                del self.start_row
733                del self.blocking_keys
734                del self.fid
735                raise StopIteration
736            fin_row = self.start_row + self.max_read_lines
737            if fin_row > self.last_row:
738                fin_row = self.last_row
739
740            if self.verbose is True and \
741                   0==self.start_row%(( self.last_row+10)/10)==0:
742                 print 'Doing %d of %d' %(self.start_row, self.last_row+10)
743            #call stuff
744            pointlist, att_dict, = \
745                   _read_pts_file_blocking( self.fid,
746                                            self.start_row,
747                                            fin_row,
748                                            self.blocking_keys
749                                            ) 
750            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
751            self.start_row = fin_row
752           
753        else:
754            try:
755                pointlist, att_dict, geo_ref, self.file_pointer = \
756                   _read_csv_file_blocking( self.file_pointer,
757                                            self.header[:],
758                                            max_read_lines=self.max_read_lines,
759                                            verbose=self.verbose)
760
761                # Check that the zones haven't changed.
762                if geo_ref is not None:
763                    geo_ref.reconcile_zones(self.blocking_georef)
764                    self.blocking_georef = geo_ref
765                elif self.blocking_georef is not None:
766                   
767                    msg = 'Geo reference given, then not given.'
768                    msg += ' This should not happen.' 
769                    raise ValueError, msg
770                geo = Geospatial_data(pointlist, att_dict, geo_ref)
771            except StopIteration:
772                self.file_pointer.close()
773                del self.header
774                del self.file_pointer
775                raise StopIteration
776            except ANUGAError:
777                self.file_pointer.close()
778                del self.header
779                del self.file_pointer
780                raise
781        return geo
782
783def _set_using_lat_long(latitudes,
784                        longitudes,
785                        geo_reference,
786                        data_points,
787                        points_are_lats_longs):
788    """
789    if the points has lat long info, assume it is in (lat, long) order.
790    """
791   
792    if geo_reference is not None:
793        msg = """A georeference is specified yet latitude and longitude
794        are also specified!"""
795        raise ValueError, msg
796   
797    if data_points is not None and not points_are_lats_longs:
798        msg = """Data points are specified yet latitude and
799        longitude are also specified!"""
800        raise ValueError, msg
801   
802    if points_are_lats_longs:
803        if data_points is None:
804            msg = """Data points are not specified !"""
805            raise ValueError, msg
806        lats_longs = ensure_numeric(data_points)
807        latitudes = ravel(lats_longs[:,0:1])
808        longitudes = ravel(lats_longs[:,1:])
809       
810    if latitudes is None and longitudes is None:
811        msg = """Latitudes and Longitudes are not."""
812        raise ValueError, msg
813   
814    if latitudes is None:
815        msg = """Longitudes are specified yet latitudes aren't!"""
816        raise ValueError, msg
817   
818    if longitudes is None:
819        msg = """Latitudes are specified yet longitudes aren't!"""
820        raise ValueError, msg
821   
822    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
823                                                    longitudes=longitudes)
824    return data_points, Geo_reference(zone=zone)
825   
826def _read_pts_file(file_name, verbose=False):
827    """Read .pts NetCDF file
828   
829    Return a dic of array of points, and dic of array of attribute
830    eg
831    dic['points'] = [[1.0,2.0],[3.0,5.0]]
832    dic['attributelist']['elevation'] = [[7.0,5.0]
833    """   
834
835    from Scientific.IO.NetCDF import NetCDFFile
836   
837    if verbose: print 'Reading ', file_name
838   
839       
840    # see if the file is there.  Throw a QUIET IO error if it isn't
841    fd = open(file_name,'r')
842    fd.close()
843   
844    #throws prints to screen if file not present
845    fid = NetCDFFile(file_name, 'r') 
846   
847    pointlist = array(fid.variables['points'])
848    keys = fid.variables.keys()
849    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
850    try:
851        keys.remove('points')
852    except IOError, e:       
853        fid.close()   
854        msg = 'Expected keyword "points" but could not find it'
855        raise IOError, msg
856   
857    attributes = {}
858    for key in keys:
859        if verbose: print "reading attribute '%s'" %key
860           
861        attributes[key] = array(fid.variables[key])
862   
863   
864    try:
865        geo_reference = Geo_reference(NetCDFObject=fid)
866    except AttributeError, e:
867        geo_reference = None
868   
869    fid.close()
870   
871    return pointlist, attributes, geo_reference
872
873
874def _read_csv_file(file_name, verbose=False):
875    """Read .csv file
876   
877    Return a dic of array of points, and dic of array of attribute
878    eg
879    dic['points'] = [[1.0,2.0],[3.0,5.0]]
880    dic['attributelist']['elevation'] = [[7.0,5.0]
881    """
882   
883    #from anuga.shallow_water.data_manager import Exposure_csv
884    #csv =Exposure_csv(file_name)
885   
886    file_pointer = open(file_name)
887    header, file_pointer = _read_csv_file_header(file_pointer)
888    try:
889        pointlist, att_dict, geo_ref, file_pointer = \
890                   _read_csv_file_blocking( \
891                file_pointer,
892                header,
893                max_read_lines=1e30) #If the file is bigger that this, block..
894    except ANUGAError:
895        file_pointer.close()
896        raise 
897    file_pointer.close()
898    return pointlist, att_dict, geo_ref   
899
900CSV_DELIMITER = ','
901def _read_csv_file_header(file_pointer,
902                          delimiter=CSV_DELIMITER,
903                          verbose=False):
904
905    """Read the header of a .csv file
906    Return a list of the header names
907    """
908    line = file_pointer.readline()
909    header = clean_line(line, delimiter)
910    return header, file_pointer
911
912def _read_csv_file_blocking(file_pointer, header,
913                            delimiter=CSV_DELIMITER,
914                            max_read_lines=MAX_READ_LINES,
915                            verbose=False):
916   
917
918    """
919    Read the body of a .csv file.
920    header: The list header of the csv file, with the x and y labels.
921    """
922    points = []
923    pointattributes = []
924    att_dict = {}
925
926    #This is to remove the x and y headers.
927    header = header[:]
928    x_header = header.pop(0)
929    y_header = header.pop(0)
930   
931    read_lines = 0
932    while read_lines<max_read_lines:
933        line = file_pointer.readline()
934        #print "line",line
935        numbers = clean_line(line,delimiter)
936        if len(numbers) <= 1:
937            break
938        if line[0] == '#':
939            continue
940        read_lines += 1
941        if True: # remove.. #if numbers != []:
942            try:
943                x = float(numbers[0])
944                y = float(numbers[1])
945                points.append([x,y])
946                numbers.pop(0)
947                numbers.pop(0)
948                if len(header) != len(numbers):
949                   
950                    file_pointer.close() 
951                    # It might not be a problem with the header
952                    #raise TitleAmountError
953                    msg = "File load error.  There might be a problem with the file header"
954                    raise IOError, msg
955                for i,num in enumerate(numbers):
956                    num.strip()
957                    if num != '\n' and num != '':
958                        #attributes.append(float(num))
959                        att_dict.setdefault(header[i],[]).append(float(num))
960            #except IOError:           
961            except ValueError:
962                raise SyntaxError
963    if points == []:
964        raise StopIteration
965       
966   
967    pointlist = array(points).astype(Float)
968    for key in att_dict.keys():
969        att_dict[key] = array(att_dict[key]).astype(Float)
970
971    #Do stuff here so the info is in lat's and longs
972    geo_ref = None
973    x_header = lower(x_header[:3])
974    y_header = lower(y_header[:3])
975    if (x_header == 'lon' or  x_header == 'lat') and \
976       (y_header == 'lon' or  y_header == 'lat'):
977        if x_header == 'lon':
978            longitudes = ravel(pointlist[:,0:1])
979            latitudes = ravel(pointlist[:,1:])
980        else:
981            latitudes = ravel(pointlist[:,0:1])
982            longitudes = ravel(pointlist[:,1:])
983       
984        pointlist, geo_ref = _set_using_lat_long(latitudes,
985                                                 longitudes,
986                                                 geo_reference=None,
987                                                 data_points=None,
988                                                 points_are_lats_longs=False)
989    return pointlist, att_dict, geo_ref, file_pointer
990
991def _read_pts_file_header(fid, verbose=False):
992
993    """
994    Read the geo_reference of a .pts file
995    """
996   
997    keys = fid.variables.keys()
998    try:
999        keys.remove('points')
1000    except IOError, e:       
1001        fid.close()   
1002        msg = 'Expected keyword "points" but could not find it'
1003        raise IOError, msg
1004    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1005   
1006    try:
1007        geo_reference = Geo_reference(NetCDFObject=fid)
1008    except AttributeError, e:
1009        geo_reference = None
1010
1011    return geo_reference, keys, fid.dimensions['number_of_points']
1012
1013def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1014                            #verbose=False):
1015   
1016
1017    """
1018    Read the body of a .csv file.
1019    header: The list header of the csv file, with the x and y labels.
1020    """
1021   
1022    pointlist = array(fid.variables['points'][start_row:fin_row])
1023   
1024    attributes = {}
1025    for key in keys:
1026        attributes[key] = array(fid.variables[key][start_row:fin_row])
1027
1028    return pointlist, attributes
1029   
1030   
1031def _read_xya_file(fd, delimiter):
1032    points = []
1033    pointattributes = []
1034    title = fd.readline()
1035    att_names = clean_line(title,delimiter)
1036    att_dict = {}
1037    line = fd.readline()
1038    numbers = clean_line(line,delimiter)
1039   
1040    while len(numbers) > 1 and line[0] <> '#':
1041        if numbers != []:
1042            try:
1043                x = float(numbers[0])
1044                y = float(numbers[1])
1045                points.append([x,y])
1046                numbers.pop(0)
1047                numbers.pop(0)
1048                if len(att_names) != len(numbers):
1049                    fd.close()
1050                    # It might not be a problem with the title
1051                    #raise TitleAmountError
1052                    raise IOError
1053                for i,num in enumerate(numbers):
1054                    num.strip()
1055                    if num != '\n' and num != '':
1056                        #attributes.append(float(num))
1057                        att_dict.setdefault(att_names[i],[]).append(float(num))
1058            except ValueError:
1059                raise SyntaxError
1060        line = fd.readline()
1061        numbers = clean_line(line,delimiter)
1062   
1063    if line == '':
1064        geo_reference = None
1065    else:
1066        geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
1067       
1068   
1069    pointlist = array(points).astype(Float)
1070    for key in att_dict.keys():
1071        att_dict[key] = array(att_dict[key]).astype(Float)
1072   
1073    return pointlist, att_dict, geo_reference
1074
1075def _write_pts_file(file_name,
1076                    write_data_points,
1077                    write_attributes=None, 
1078                    write_geo_reference=None):
1079    """
1080    Write .pts NetCDF file   
1081
1082    NOTE: Below might not be valid ask Duncan : NB 5/2006
1083   
1084    WARNING: This function mangles the point_atts data structure
1085    #F??ME: (DSG)This format has issues.
1086    # There can't be an attribute called points
1087    # consider format change
1088    # method changed by NB not sure if above statement is correct
1089   
1090    should create new test for this
1091    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1092    for key in point_atts.keys():
1093        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1094        assert key in legal_keys, msg
1095    """   
1096    from Scientific.IO.NetCDF import NetCDFFile
1097    # NetCDF file definition
1098    outfile = NetCDFFile(file_name, 'w')
1099   
1100    #Create new file
1101    outfile.institution = 'Geoscience Australia'
1102    outfile.description = 'NetCDF format for compact and portable storage ' +\
1103                          'of spatial point data'
1104   
1105    # dimension definitions
1106    shape = write_data_points.shape[0]
1107    outfile.createDimension('number_of_points', shape) 
1108    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1109   
1110    # variable definition
1111    outfile.createVariable('points', Float, ('number_of_points',
1112                                             'number_of_dimensions'))
1113
1114    #create variables 
1115    outfile.variables['points'][:] = write_data_points #.astype(Float32)
1116
1117    if write_attributes is not None:
1118        for key in write_attributes.keys():
1119            outfile.createVariable(key, Float, ('number_of_points',))
1120            outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1121       
1122    if write_geo_reference is not None:
1123        write_geo_reference.write_NetCDF(outfile)
1124       
1125    outfile.close() 
1126 
1127
1128
1129def _write_xya_file(file_name,
1130                    write_data_points,
1131                    write_attributes=None, 
1132                    write_geo_reference=None, 
1133                    delimiter=','):
1134    """
1135    export a file, file_name, with the xya format
1136   
1137    """
1138    points = write_data_points
1139    pointattributes = write_attributes
1140   
1141    fd = open(file_name,'w')
1142    titlelist = ""
1143    if pointattributes is not None:   
1144        for title in pointattributes.keys():
1145            titlelist = titlelist + title + delimiter
1146        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1147    fd.write(titlelist+"\n")
1148   
1149    #<vertex #> <x> <y> [attributes]
1150    for i, vert in enumerate( points):
1151
1152
1153        if pointattributes is not None:           
1154            attlist = ","
1155            for att in pointattributes.keys():
1156                attlist = attlist + str(pointattributes[att][i])+ delimiter
1157            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1158            attlist.strip()
1159        else:
1160            attlist = ''
1161
1162        fd.write(str(vert[0]) + delimiter +
1163                 str(vert[1]) + attlist + "\n")
1164
1165    if  write_geo_reference is not None:
1166        write_geo_reference.write_ASCII(fd)
1167    fd.close()
1168
1169
1170def _write_csv_file(file_name,
1171                    write_data_points,
1172                    write_attributes=None, 
1173                    delimiter=','):
1174    """
1175    export a file, file_name, with the xya format
1176   
1177    """
1178    points = write_data_points
1179    pointattributes = write_attributes
1180   
1181    fd = open(file_name,'w')
1182    titlelist = "x" + delimiter + "y"  + delimiter
1183    if pointattributes is not None:   
1184        for title in pointattributes.keys():
1185            titlelist = titlelist + title + delimiter
1186        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1187    fd.write(titlelist+"\n")
1188   
1189    #<vertex #> <x> <y> [attributes]
1190    for i, vert in enumerate( points):
1191
1192
1193        if pointattributes is not None:           
1194            attlist = ","
1195            for att in pointattributes.keys():
1196                attlist = attlist + str(pointattributes[att][i])+ delimiter
1197            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1198            attlist.strip()
1199        else:
1200            attlist = ''
1201
1202        fd.write(str(vert[0]) + delimiter +
1203                 str(vert[1]) + attlist + "\n")
1204
1205    fd.close()
1206   
1207def _point_atts2array(point_atts):
1208    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1209   
1210    for key in point_atts['attributelist'].keys():
1211        point_atts['attributelist'][key]=\
1212                array(point_atts['attributelist'][key]).astype(Float)
1213    return point_atts
1214
1215   
1216
1217
1218def geospatial_data2points_dictionary(geospatial_data):
1219    """Convert geospatial data to points_dictionary
1220    """
1221
1222    points_dictionary = {}
1223    points_dictionary['pointlist'] = geospatial_data.data_points
1224
1225    points_dictionary['attributelist'] = {}
1226
1227    for attribute_name in geospatial_data.attributes.keys():
1228        val = geospatial_data.attributes[attribute_name]
1229        points_dictionary['attributelist'][attribute_name] = val
1230
1231    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1232
1233    return points_dictionary
1234
1235   
1236def points_dictionary2geospatial_data(points_dictionary):
1237    """Convert points_dictionary to geospatial data object
1238    """
1239
1240    msg = 'Points dictionary must have key pointlist' 
1241    assert points_dictionary.has_key('pointlist'), msg
1242
1243    msg = 'Points dictionary must have key attributelist'     
1244    assert points_dictionary.has_key('attributelist'), msg       
1245
1246    if points_dictionary.has_key('geo_reference'):
1247        geo = points_dictionary['geo_reference']
1248    else:
1249        geo = None
1250   
1251    return Geospatial_data(points_dictionary['pointlist'],
1252                           points_dictionary['attributelist'],
1253                           geo_reference = geo)
1254
1255def clean_line(line,delimiter):     
1256    """Remove whitespace
1257    """
1258    #print ">%s" %line
1259    line = line.strip()
1260    #print "stripped>%s" %line
1261    numbers = line.split(delimiter)
1262    i = len(numbers) - 1
1263    while i >= 0:
1264        if numbers[i] == '':
1265            numbers.pop(i)
1266        else:
1267            numbers[i] = numbers[i].strip()
1268       
1269        i += -1
1270    #for num in numbers:
1271    #    print "num>%s<" %num
1272    return numbers
1273           
1274def ensure_absolute(points, geo_reference=None):
1275    """
1276    This function inputs several formats and
1277    outputs one format. - a numeric array of absolute points.
1278
1279    Inputed formats are;
1280    points: List or numeric array of coordinate pairs [xi, eta] of
1281              points or geospatial object or points file name
1282
1283    mesh_origin: A geo_reference object or 3-tuples consisting of
1284                 UTM zone, easting and northing.
1285                 If specified vertex coordinates are assumed to be
1286                 relative to their respective origins.
1287    """
1288    if isinstance(points,type('')):
1289        #It's a string
1290        #assume it is a point file
1291        points = Geospatial_data(file_name = points)
1292       
1293    if isinstance(points,Geospatial_data):
1294        points = points.get_data_points( \
1295                absolute = True)
1296        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1297        assert geo_reference == None, msg
1298           
1299    else:
1300        points = ensure_numeric(points, Float)
1301    if geo_reference is None:
1302        geo = None #Geo_reference()
1303    else:
1304        if isinstance(geo_reference, Geo_reference):
1305            geo = geo_reference
1306        else:
1307            geo = Geo_reference(geo_reference[0],
1308                                geo_reference[1],
1309                                geo_reference[2])
1310        points = geo.get_absolute(points)
1311    return points
1312     
1313
1314def ensure_geospatial(points, geo_reference=None):
1315    """
1316    This function inputs several formats and
1317    outputs one format. - a geospatial_data instance.
1318
1319    Inputed formats are;
1320    points: List or numeric array of coordinate pairs [xi, eta] of
1321              points or geospatial object
1322
1323    mesh_origin: A geo_reference object or 3-tuples consisting of
1324                 UTM zone, easting and northing.
1325                 If specified vertex coordinates are assumed to be
1326                 relative to their respective origins.
1327    """
1328    if isinstance(points,Geospatial_data):
1329        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1330        assert geo_reference == None, msg
1331        return points   
1332    else:
1333        points = ensure_numeric(points, Float)
1334    if geo_reference is None:
1335        geo = None
1336    else:
1337        if isinstance(geo_reference, Geo_reference):
1338            geo = geo_reference
1339        else:
1340            geo = Geo_reference(geo_reference[0],
1341                                geo_reference[1],
1342                                geo_reference[2])
1343    points = Geospatial_data(data_points=points, geo_reference=geo)       
1344    return points
1345
1346#def file2xya(filename):
1347   
1348#    G = Geospatial_data(filename)
1349#    G.export_points_file()
1350
1351             
1352
1353         
1354if __name__ == "__main__":
1355    pass
1356   
Note: See TracBrowser for help on using the repository browser.