source: inundation/geospatial_data/geospatial_data.py @ 3266

Last change on this file since 3266 was 3266, checked in by duncan, 18 years ago

comments and testing.

File size: 26.4 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5
6
7from utilities.numerical_tools import ensure_numeric
8
9from Numeric import concatenate, array, Float, shape
10
11from coordinate_transforms.geo_reference import Geo_reference, TitleError
12
13from os import access, F_OK, R_OK
14       
15class Geospatial_data:
16
17    def __init__(self,
18                 data_points = None,
19                 attributes = None,
20                 geo_reference = None,
21                 default_attribute_name = None,
22                 file_name = None,
23                 delimiter = None,
24                 verbose = False):
25
26       
27        """
28        Create instance from data points and associated attributes
29
30        data_points: x,y coordinates in meters. Type must be either a
31        sequence of 2-tuples or an Mx2 Numeric array of floats.
32
33        attributes: Associated values for each data point. The type
34        must be either a list or an array of length M or a dictionary
35        of lists (or arrays) of length M. In the latter case the keys
36        in the dictionary represent the attribute names, in the former
37        the attribute will get the default name "attribute".
38       
39        geo_reference: Object representing the origin of the data
40        points. It contains UTM zone, easting and northing and data
41        points are assumed to be relative to this origin.
42        If geo_reference is None, the default geo ref object is used
43
44        default_attribute_name: Name of default attribute to be used with
45        get_attribute_values. The idea is that the dataset can be
46        equipped with information about which attribute to return.
47        If None, the default is the "first"
48       
49        file_name: Name of input netCDF file or xya file. netCDF file must
50        have dimensions "points" etc.
51        xya file is a CVS file with lats(x), longs(y) and elevation(a).
52        first line must be attribute name eg elevation
53       
54        The format for a .xya file is:
55            1st line:     [attribute names]
56            other lines:  x y [attributes]
57
58            for example:
59            elevation, friction
60            0.6, 0.7, 4.9, 0.3
61            1.9, 2.8, 5, 0.3
62            2.7, 2.4, 5.2, 0.3
63
64        The first two columns are always implicitly assumed to be x, y coordinates.
65        Use the same delimiter for the attribute names and the data
66
67        An xya file can optionally end with
68            #geo reference
69            56
70            466600.0
71            8644444.0
72
73        When the 1st # is the zone,
74        2nd # the xllcorner and
75        3rd # the yllcorner
76       
77        The format for a Points dictionary is:
78
79          ['pointlist'] a 2 column array describing points. 1st column x, 2nd column y.
80          ['attributelist'], a dictionary of 1D arrays, representing attribute values
81          at the point.  The dictionary key is the attribute header.
82          ['geo_reference'] a Geo_refernece object. Use if the point information
83            is relative. This is optional.
84            eg
85            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
86            dic['attributelist']['elevation'] = [[7.0,5.0]
87               
88        delimiter: is the file delimiter that will be used when
89            importing the file
90           
91        verbose:
92         
93        """
94
95        if isinstance(data_points, basestring):
96            # assume data point is really a file name
97            file_name = data_points
98
99        self.set_verbose(verbose)
100        self.geo_reference=None #create the attribute
101        if file_name is None:
102            if delimiter is not None:
103                msg = 'No file specified yet a delimiter is provided!'
104                raise ValueError, msg
105            file_name = None
106            self.check_data_points(data_points)
107            self.set_attributes(attributes)
108            self.set_geo_reference(geo_reference)
109            self.set_default_attribute_name(default_attribute_name)
110
111        else:
112###                data = {}   
113                # watch for case where file name and points, attributes etc are provided!!
114                # if file name then all provided info will be removed!
115                self.import_points_file(file_name, delimiter, verbose)
116               
117                self.check_data_points(self.data_points)
118                self.set_attributes(self.attributes)
119                self.set_geo_reference(self.geo_reference)
120                self.set_default_attribute_name(default_attribute_name)
121
122       
123    def check_data_points(self, data_points):
124        """Checks data points
125        """
126       
127        if data_points is None:
128            self.data_points = None
129            msg = 'There is no data or file provided!'
130            raise ValueError, msg
131           
132        else:
133            self.data_points = ensure_numeric(data_points)
134            assert len(self.data_points.shape) == 2
135            assert self.data_points.shape[1] == 2
136
137    def set_attributes(self, attributes):
138        """Check and assign attributes dictionary
139        """
140       
141        from types import DictType
142       
143        if attributes is None:
144            self.attributes = None
145            return
146       
147        if not isinstance(attributes, DictType):
148            #Convert single attribute into dictionary
149            attributes = {'attribute': attributes}
150
151        #Check input attributes   
152        for key in attributes.keys():
153            try:
154                attributes[key] = ensure_numeric(attributes[key])
155            except:
156                msg = 'Attribute %s could not be converted' %key
157                msg += 'to a numeric vector'
158                raise msg
159
160        self.attributes = attributes   
161
162
163    def set_geo_reference(self, geo_reference):
164
165        from coordinate_transforms.geo_reference import Geo_reference
166
167        if geo_reference is None:
168            geo_reference = Geo_reference() # Use default
169        if not isinstance(geo_reference, Geo_reference):
170            msg = 'Argument geo_reference must be a valid Geo_reference \n'
171            msg += 'object or None.'
172            raise msg
173
174        # if a geo ref already exists, change the point data to
175        # represent the new geo-ref
176        if  self.geo_reference is not None:
177            #FIXME: Maybe put out a warning here...
178            self.data_points = self.get_data_points \
179                               (geo_reference=geo_reference)
180           
181        self.geo_reference = geo_reference
182
183
184    def set_default_attribute_name(self, default_attribute_name):
185        self.default_attribute_name = default_attribute_name
186
187    def set_verbose(self, verbose = False):
188        if verbose is not False:
189            verbose = True
190        else:
191            verbose = False
192   
193    def get_geo_reference(self):
194        return self.geo_reference
195       
196    def get_data_points(self, absolute = True, geo_reference=None):
197        """Get coordinates for all data points as an Nx2 array
198
199        If absolute is True absolute UTM coordinates are returned otherwise
200        returned coordinates are relative to the internal georeference's
201        xll and yll corners.
202
203        If a geo_reference is passed the points are returned relative
204        to that geo_reference.
205
206        Default: absolute is True.
207        """
208
209        if absolute is True and geo_reference is None:
210            return self.geo_reference.get_absolute(self.data_points)
211        elif geo_reference is not None:
212            return geo_reference.change_points_geo_ref \
213                               (self.data_points, 
214                                self.geo_reference)
215        else:
216            return self.data_points
217       
218   
219    def get_attributes(self, attribute_name = None):
220        """Return values for one named attribute.
221
222        If attribute_name is None, default_attribute_name is used
223        """
224
225        if attribute_name is None:
226            if self.default_attribute_name is not None:
227                attribute_name = self.default_attribute_name
228            else:
229                attribute_name = self.attributes.keys()[0] 
230                # above line takes the first one from keys
231               
232
233        msg = 'Attribute name %s does not exist in data set' %attribute_name
234        assert self.attributes.has_key(attribute_name), msg
235
236        return self.attributes[attribute_name]
237
238    def get_all_attributes(self):
239        """
240        Return values for all attributes.
241        """
242
243        return self.attributes
244
245    def __add__(self, other):
246        """
247        Returns the addition of 2 geospatical objects,
248        objects are concatencated to the end of each other
249           
250        NOTE: doesn't add if objects contain different
251        attributes 
252       
253        Always return relative points!
254        """
255
256        # find objects zone and checks if the same
257        geo_ref1 = self.get_geo_reference()
258        zone1 = geo_ref1.get_zone()
259       
260        geo_ref2 = other.get_geo_reference()
261        zone2 = geo_ref2.get_zone()
262
263        geo_ref1.reconcile_zones(geo_ref2)
264
265
266        # sets xll and yll as the smallest from self and other
267        # FIXME (Duncan and Ole): use lower left corner derived from absolute coordinates
268        if self.geo_reference.xllcorner <= other.geo_reference.xllcorner:
269            xll = self.geo_reference.xllcorner
270        else:
271            xll = other.geo_reference.xllcorner
272
273        if self.geo_reference.yllcorner <= other.geo_reference.yllcorner:
274            yll = self.geo_reference.yllcorner
275        else:
276            yll = other.geo_reference.yllcorner
277        new_geo_ref = Geo_reference(geo_ref1.get_zone(), xll, yll)
278
279        xll = yll = 0. 
280       
281        relative_points1 = self.get_data_points(absolute = False)
282        relative_points2 = other.get_data_points(absolute = False)
283
284       
285        new_relative_points1 = new_geo_ref.change_points_geo_ref(relative_points1, geo_ref1)
286        new_relative_points2 = new_geo_ref.change_points_geo_ref(relative_points2, geo_ref2)
287       
288        #Now both point sets are relative to new_geo_ref and zones have been reconciled
289
290        # Concatenate points
291        new_points = concatenate((new_relative_points1,
292                                  new_relative_points2),
293                                  axis = 0)
294     
295        # Concatenate attributes
296        new_attributes = {}
297        for x in self.attributes.keys():
298            if other.attributes.has_key(x):
299
300                attrib1 = self.attributes[x]
301                attrib2 = other.attributes[x]
302                new_attributes[x] = concatenate((attrib1, attrib2))
303
304            else:
305                msg = 'Both geospatial_data objects must have the same \n'
306                msg += 'attributes to allow addition.'
307                raise msg
308
309        # Instantiate new data object and return   
310        return Geospatial_data(new_points,
311                               new_attributes,
312                               new_geo_ref)
313   
314    ###
315    #  IMPORT/EXPORT POINTS FILES
316    ###
317
318    def import_points_file(self, file_name, delimiter = None, verbose = False):
319        """ load an .xya or .pts file
320        Note: will throw an IOError if it can't load the file.
321        Catch these!
322        """
323       
324        if access(file_name, F_OK) == 0 :
325            msg = 'File %s does not exist or is not accessible' %file_name
326            raise IOError, msg
327       
328        attributes = {}
329        if file_name[-4:]== ".xya":
330            try:
331                if delimiter == None:
332                    try:
333                        fd = open(file_name)
334                        data_points, attributes, geo_reference = _read_xya_file(fd, ',')
335                    except TitleError:
336                        fd.close()
337                        fd = open(file_name)
338                        data_points, attributes, geo_reference = _read_xya_file(fd, ' ')
339                else:
340                    fd = open(file_name)
341                    data_points, attributes, geo_reference = _read_xya_file(fd, delimiter)
342                fd.close()
343            except (IndexError,ValueError,SyntaxError):
344                fd.close()   
345                msg = 'Could not open file %s ' %file_name
346                raise IOError, msg
347            except IOError, e:
348                fd.close() 
349                # Catch this to add an error message
350                msg = 'Could not open file or incorrect file format %s:%s' %(file_name, e)
351                raise IOError, msg
352               
353        elif file_name[-4:]== ".pts":
354            try:
355                data_points, attributes, geo_reference = _read_pts_file(file_name, verbose)
356            except IOError, e:   
357                msg = 'Could not open file %s ' %file_name
358                raise IOError, msg       
359        else:     
360            msg = 'Extension %s is unknown' %file_name[-4:]
361            raise IOError, msg
362       
363#        print'in import data_points', data_points
364#        print'in import attributes', attributes
365#        print'in import data_points', geo_reference
366        self.data_points = data_points
367        self.attributes = attributes
368        self.geo_reference = geo_reference
369   
370#        return all_data
371   
372    def export_points_file(self, file_name, absolute=True):
373       
374        """
375        write a points file, file_name, as a text (.xya) or binary (.pts) file
376        file_name is the file name, including the extension
377        The point_dict is defined at the top of this file.
378       
379        If absolute is True data points at returned added to the xll and yll
380        and geo_reference as None
381       
382        If absolute is False data points at returned as relative to the xll
383        and yll and geo_reference remains uneffected
384        """
385   
386        if (file_name[-4:] == ".xya"):
387            if absolute is True:         
388                _write_xya_file(file_name, self.get_data_points(absolute=True), 
389                                   self.get_all_attributes())
390            else:
391                _write_xya_file(file_name, self.get_data_points(absolute=False), 
392                                   self.get_all_attributes(),
393                                   self.get_geo_reference())
394                                   
395        elif (file_name[-4:] == ".pts"):
396            if absolute is True:
397                _write_pts_file(file_name, self.get_data_points(absolute), 
398                                   self.get_all_attributes())
399            else:
400                _write_pts_file(file_name, self.get_data_points(absolute), 
401                                   self.get_all_attributes(),
402                                   self.get_geo_reference())
403        else:
404            msg = 'Unknown file type %s ' %file_name
405            raise IOError, msg
406   
407
408
409
410def _read_pts_file(file_name, verbose = False):
411    """Read .pts NetCDF file
412   
413    Return a dic of array of points, and dic of array of attribute
414    eg
415    dic['points'] = [[1.0,2.0],[3.0,5.0]]
416    dic['attributelist']['elevation'] = [[7.0,5.0]
417    """   
418
419    from Scientific.IO.NetCDF import NetCDFFile
420   
421    if verbose: print 'Reading ', file_name
422   
423       
424    # see if the file is there.  Throw a QUIET IO error if it isn't
425    fd = open(file_name,'r')
426    fd.close()
427   
428    #throws prints to screen if file not present
429    fid = NetCDFFile(file_name, 'r') 
430   
431#    point_atts = {} 
432        # Get the variables
433#    point_atts['pointlist'] = array(fid.variables['points'])
434    pointlist = array(fid.variables['points'])
435    keys = fid.variables.keys()
436    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
437    try:
438        keys.remove('points')
439    except IOError, e:       
440        fid.close()   
441        msg = 'Expected keyword "points" but could not find it'
442        raise IOError, msg
443   
444    attributes = {}
445    for key in keys:
446        if verbose: print "reading attribute '%s'" %key
447           
448        attributes[key] = array(fid.variables[key])
449   
450#    point_atts['attributelist'] = attributes
451   
452    try:
453        geo_reference = Geo_reference(NetCDFObject=fid)
454#        point_atts['geo_reference'] = geo_reference
455    except AttributeError, e:
456        #geo_ref not compulsory
457#        point_atts['geo_reference'] = None
458        geo_reference = None
459   
460    fid.close()
461   
462    return pointlist, attributes, geo_reference
463
464
465def _read_xya_file( fd, delimiter):
466    points = []
467    pointattributes = []
468    title = fd.readline()
469    att_names = clean_line(title,delimiter)
470    att_dict = {}
471    line = fd.readline()
472    numbers = clean_line(line,delimiter)
473   
474    while len(numbers) > 1 and line[0] <> '#':
475        if numbers != []:
476            try:
477                x = float(numbers[0])
478                y = float(numbers[1])
479                points.append([x,y])
480                numbers.pop(0)
481                numbers.pop(0)
482                if len(att_names) != len(numbers):
483                    fd.close()
484                    # It might not be a problem with the title
485                    #raise TitleAmountError
486                    raise IOError
487                for i,num in enumerate(numbers):
488                    num.strip()
489                    if num != '\n' and num != '':
490                        #attributes.append(float(num))
491                        att_dict.setdefault(att_names[i],[]).append(float(num))
492            except ValueError:
493                raise SyntaxError
494        line = fd.readline()
495        numbers = clean_line(line,delimiter)
496   
497    if line == '':
498        geo_reference = None
499    else:
500        geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
501       
502   
503    pointlist = array(points).astype(Float)
504    for key in att_dict.keys():
505        att_dict[key] = array(att_dict[key]).astype(Float)
506   
507    return pointlist, att_dict, geo_reference
508
509def _write_pts_file(file_name, write_data_points,
510                                   write_attributes, 
511                                   write_geo_reference = None):
512    """
513    Write .pts NetCDF file   
514
515    NOTE: Below might not be valid ask Duncan : NB 5/2006
516   
517    WARNING: This function mangles the point_atts data structure
518    #F??ME: (DSG)This format has issues.
519    # There can't be an attribute called points
520    # consider format change
521    # method changed by NB not sure if above statement is correct
522   
523    should create new test for this
524    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
525    for key in point_atts.keys():
526        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
527        assert key in legal_keys, msg
528    """   
529    from Scientific.IO.NetCDF import NetCDFFile
530    # NetCDF file definition
531    outfile = NetCDFFile(file_name, 'w')
532   
533    #Create new file
534    outfile.institution = 'Geoscience Australia'
535    outfile.description = 'NetCDF format for compact and portable storage ' +\
536                      'of spatial point data'
537   
538    # dimension definitions
539    shape = write_data_points.shape[0]
540    outfile.createDimension('number_of_points', shape) 
541    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
542   
543    # variable definition
544    outfile.createVariable('points', Float, ('number_of_points',
545                                             'number_of_dimensions'))
546
547    #create variables 
548    outfile.variables['points'][:] = write_data_points #.astype(Float32)
549
550    for key in write_attributes.keys():
551        outfile.createVariable(key, Float, ('number_of_points',))
552        outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
553       
554    if write_geo_reference is not None:
555        write_geo_reference.write_NetCDF(outfile)
556       
557    outfile.close() 
558 
559
560
561def _write_xya_file( file_name, write_data_points,
562                                    write_attributes, 
563                                    write_geo_reference = None, 
564                                    delimiter = ','):
565    """
566    export a file, file_name, with the xya format
567   
568    """
569    points = write_data_points
570    pointattributes = write_attributes
571   
572    fd = open(file_name,'w')
573    titlelist = ""
574    for title in pointattributes.keys():
575        titlelist = titlelist + title + delimiter
576    titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
577    fd.write(titlelist+"\n")
578    #<vertex #> <x> <y> [attributes]
579    for i,vert in enumerate( points):
580       
581        attlist = ","
582        for att in pointattributes.keys():
583            attlist = attlist + str(pointattributes[att][i])+ delimiter
584        attlist = attlist[0:-len(delimiter)] # remove the last delimiter
585        attlist.strip()
586        fd.write( str(vert[0]) + delimiter
587                  + str(vert[1])
588                  + attlist + "\n")
589
590    if  write_geo_reference is not None:
591        write_geo_reference.write_ASCII(fd)
592    fd.close()
593
594
595   
596def _point_atts2array(point_atts):
597    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
598   
599    for key in point_atts['attributelist'].keys():
600        point_atts['attributelist'][key]= array(point_atts['attributelist'][key]).astype(Float)
601    return point_atts
602
603   
604
605
606def geospatial_data2points_dictionary(geospatial_data):
607    """Convert geospatial data to points_dictionary
608    """
609
610    points_dictionary = {}
611    points_dictionary['pointlist'] = geospatial_data.data_points
612
613    points_dictionary['attributelist'] = {}
614
615    for attribute_name in geospatial_data.attributes.keys():
616        val = geospatial_data.attributes[attribute_name]
617        points_dictionary['attributelist'][attribute_name] = val
618
619    points_dictionary['geo_reference'] = geospatial_data.geo_reference
620
621    return points_dictionary
622
623   
624def points_dictionary2geospatial_data(points_dictionary):
625    """Convert points_dictionary to geospatial data object
626    """
627
628    msg = 'Points dictionary must have key pointlist' 
629    assert points_dictionary.has_key('pointlist'), msg
630
631    msg = 'Points dictionary must have key attributelist'     
632    assert points_dictionary.has_key('attributelist'), msg       
633
634    if points_dictionary.has_key('geo_reference'):
635        geo = points_dictionary['geo_reference']
636    else:
637        geo = None
638   
639    return Geospatial_data(points_dictionary['pointlist'],
640                           points_dictionary['attributelist'],
641                           geo_reference = geo)
642
643def clean_line(line,delimiter):     
644    """Remove whitespace
645    """
646    #print ">%s" %line
647    line = line.strip()
648    #print "stripped>%s" %line
649    numbers = line.split(delimiter)
650    i = len(numbers) - 1
651    while i >= 0:
652        if numbers[i] == '':
653            numbers.pop(i)
654        else:
655            numbers[i] = numbers[i].strip()
656       
657        i += -1
658    #for num in numbers:
659    #    print "num>%s<" %num
660    return numbers
661           
662def xxx_add_points_files(add_file1, add_file2, results_file):
663    """ adds the points and attruibutes of 2 pts or xya files and
664    writes it to a pts file
665   
666    NOTE will add the function to check and remove points from one set
667    that are shared. This will require some work and maybe __subtract__ function
668    """
669   
670    G1 = Geospatial_data(file_name = add_file1)
671    G2 = Geospatial_data(file_name = add_file2)
672    new_add_file2 = add_file2[:-4] + '.pts' 
673
674    G = G1 + G2
675   
676    #FIXME remove dependance on points to dict in export only!
677#    G_points_dict = geospatial_data2points_dictionary(G)
678#    export_points_file(results_file, G_points_dict)
679
680#    G_points_dict = geospatial_data2points_dictionary(G)
681
682    G.export_points_file(results_file)
683
684# '
685def ensure_absolute(points, geo_reference = None):
686    """
687    This function inputs several formats and
688    outputs one format. - a numeric array of absolute points.
689
690    Inputed formats are;
691    points: List or numeric array of coordinate pairs [xi, eta] of
692              points or geospatial object or points file name
693
694    mesh_origin: A geo_reference object or 3-tuples consisting of
695                 UTM zone, easting and northing.
696                 If specified vertex coordinates are assumed to be
697                 relative to their respective origins.
698    """
699    if isinstance(points,type('')):
700        #It's a string
701        #assume it is a point file
702        points = Geospatial_data(file_name = points)
703       
704    if isinstance(points,Geospatial_data):
705        points = points.get_data_points( \
706                absolute = True)
707        msg = "Use a Geospatial_data object or a mesh origin. Not both."
708        assert geo_reference == None, msg
709           
710    else:
711        points = ensure_numeric(points, Float)
712    if geo_reference is None:
713        geo = None #Geo_reference()
714    else:
715        if isinstance(geo_reference, Geo_reference):
716            geo = geo_reference
717        else:
718            geo = Geo_reference(geo_reference[0],
719                                geo_reference[1],
720                                geo_reference[2])
721        points = geo.get_absolute(points)
722    return points
723     
724
725def ensure_geospatial(points, geo_reference = None):
726    """
727    This function inputs several formats and
728    outputs one format. - a geospatial_data instance.
729
730    Inputed formats are;
731    points: List or numeric array of coordinate pairs [xi, eta] of
732              points or geospatial object
733
734    mesh_origin: A geo_reference object or 3-tuples consisting of
735                 UTM zone, easting and northing.
736                 If specified vertex coordinates are assumed to be
737                 relative to their respective origins.
738    """
739    if isinstance(points,Geospatial_data):
740        msg = "Use a Geospatial_data object or a mesh origin. Not both."
741        assert geo_reference == None, msg
742           
743    else:
744        points = ensure_numeric(points, Float)
745    if geo_reference is None:
746        geo = None
747    else:
748        if isinstance(geo_reference, Geo_reference):
749            geo = geo_reference
750        else:
751            geo = Geo_reference(geo_reference[0],
752                                geo_reference[1],
753                                geo_reference[2])
754        points = Geospatial_data(data_points=points, geo_reference=geo)       
755    return points
756             
Note: See TracBrowser for help on using the repository browser.