source: inundation/geospatial_data/geospatial_data.py @ 3158

Last change on this file since 3158 was 3154, checked in by duncan, 19 years ago

more flexibility..

File size: 26.5 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5
6
7from utilities.numerical_tools import ensure_numeric
8
9from Numeric import concatenate, array, Float, shape
10
11from coordinate_transforms.geo_reference import Geo_reference, TitleError
12
13from os import access, F_OK, R_OK
14       
15class Geospatial_data:
16
17    def __init__(self,
18                 data_points = None,
19                 attributes = None,
20                 geo_reference = None,
21                 default_attribute_name = None,
22                 file_name = None,
23                 delimiter = None,
24                 verbose = False):
25
26       
27        """
28        Create instance from data points and associated attributes
29
30        data_points: x,y coordinates in meters. Type must be either a
31        sequence of 2-tuples or an Mx2 Numeric array of floats.
32
33        attributes: Associated values for each data point. The type
34        must be either a list or an array of length M or a dictionary
35        of lists (or arrays) of length M. In the latter case the keys
36        in the dictionary represent the attribute names, in the former
37        the attribute will get the default name "attribute".
38       
39        geo_reference: Object representing the origin of the data
40        points. It contains UTM zone, easting and northing and data
41        points are assumed to be relative to this origin.
42        If geo_reference is None, the default geo ref object is used
43
44        default_attribute_name: Name of default attribute to be used with
45        get_attribute_values. The idea is that the dataset can be
46        equipped with information about which attribute to return.
47        If None, the default is the "first"
48       
49        file_name: Name of input netCDF file or xya file. netCDF file must
50        have dimensions "points" etc.
51        xya file is a CVS file with lats(x), longs(y) and elevation(a).
52        first line must be attribute name eg elevation
53       
54        The format for a .xya file is:
55            1st line:     [attribute names]
56            other lines:  x y [attributes]
57
58            for example:
59            elevation, friction
60            0.6, 0.7, 4.9, 0.3
61            1.9, 2.8, 5, 0.3
62            2.7, 2.4, 5.2, 0.3
63
64        The first two columns are always implicitly assumed to be x, y coordinates.
65        Use the same delimiter for the attribute names and the data
66
67        An xya file can optionally end with
68            #geo reference
69            56
70            466600.0
71            8644444.0
72
73        When the 1st # is the zone,
74        2nd # the xllcorner and
75        3rd # the yllcorner
76       
77        The format for a Points dictionary is:
78
79          ['pointlist'] a 2 column array describing points. 1st column x, 2nd column y.
80          ['attributelist'], a dictionary of 1D arrays, representing attribute values
81          at the point.  The dictionary key is the attribute header.
82          ['geo_reference'] a Geo_refernece object. Use if the point information
83            is relative. This is optional.
84            eg
85            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
86            dic['attributelist']['elevation'] = [[7.0,5.0]
87               
88        delimiter: is the file delimiter that will be used when
89            importing the file
90           
91        verbose:
92         
93        """
94
95        if isinstance(data_points, basestring):
96            # assume data point is really a file name
97            file_name = data_points
98
99        self.set_verbose(verbose)
100        self.geo_reference=None #create the attribute
101        if file_name is None:
102            if delimiter is not None:
103                msg = 'No file specified yet a delimiter is provided!'
104                raise ValueError, msg
105            file_name = None
106            self.check_data_points(data_points)
107            self.set_attributes(attributes)
108            self.set_geo_reference(geo_reference)
109            self.set_default_attribute_name(default_attribute_name)
110
111        else:
112###                data = {}   
113                # watch for case where file name and points, attributes etc are provided!!
114                # if file name then all provided info will be removed!
115                self.import_points_file(file_name, delimiter, verbose)
116               
117                self.check_data_points(self.data_points)
118                self.set_attributes(self.attributes)
119                self.set_geo_reference(self.geo_reference)
120                self.set_default_attribute_name(default_attribute_name)
121
122       
123    def check_data_points(self, data_points):
124        """Checks data points
125        """
126       
127        if data_points is None:
128            self.data_points = None
129            msg = 'There is no data or file provided!'
130            raise ValueError, msg
131           
132        else:
133            self.data_points = ensure_numeric(data_points)
134            assert len(self.data_points.shape) == 2
135            assert self.data_points.shape[1] == 2
136
137    def set_attributes(self, attributes):
138        """Check and assign attributes dictionary
139        """
140       
141        from types import DictType
142       
143        if attributes is None:
144            self.attributes = None
145            return
146       
147        if not isinstance(attributes, DictType):
148            #Convert single attribute into dictionary
149            attributes = {'attribute': attributes}
150
151        #Check input attributes   
152        for key in attributes.keys():
153            try:
154                attributes[key] = ensure_numeric(attributes[key])
155            except:
156                msg = 'Attribute %s could not be converted' %key
157                msg += 'to a numeric vector'
158                raise msg
159
160        self.attributes = attributes   
161
162
163    def set_geo_reference(self, geo_reference):
164
165        from coordinate_transforms.geo_reference import Geo_reference
166
167        if geo_reference is None:
168            geo_reference = Geo_reference() # Use default
169        if not isinstance(geo_reference, Geo_reference):
170            msg = 'Argument geo_reference must be a valid Geo_reference \n'
171            msg += 'object or None.'
172            raise msg
173
174        # if a geo ref already exists, change the point data to
175        # represent the new geo-ref
176        if  self.geo_reference is not None:
177            #print "self.geo_reference",self.geo_reference
178            #print "geo_reference",geo_reference
179            #FIXME: Maybe put out a warning here...
180            self.data_points = self.get_data_points \
181                               (geo_reference=geo_reference)
182            #self.data_points = geo_reference.change_points_geo_ref \
183            #                   (self.data_points,
184            #                    self.geo_reference)
185           
186        self.geo_reference = geo_reference
187
188
189    def set_default_attribute_name(self, default_attribute_name):
190        self.default_attribute_name = default_attribute_name
191
192    def set_verbose(self, verbose = False):
193        if verbose is not False:
194            verbose = True
195        else:
196            verbose = False
197   
198    def get_geo_reference(self):
199        return self.geo_reference
200       
201    def get_data_points(self, absolute = True, geo_reference=None):
202        """Get coordinates for all data points as an Nx2 array
203
204        If absolute is True absolute UTM coordinates are returned otherwise
205        returned coordinates are relative to the internal georeference's
206        xll and yll corners.
207
208        Default: absolute is True.
209        """
210
211        if absolute is True and geo_reference is None:
212            return self.geo_reference.get_absolute(self.data_points)
213        elif geo_reference is not None:
214            return geo_reference.change_points_geo_ref \
215                               (self.data_points, 
216                                self.geo_reference)
217        else:
218            return self.data_points
219       
220   
221    def get_attributes(self, attribute_name = None):
222        """Return values for one named attribute.
223
224        If attribute_name is None, default_attribute_name is used
225        """
226
227        if attribute_name is None:
228            if self.default_attribute_name is not None:
229                attribute_name = self.default_attribute_name
230            else:
231                attribute_name = self.attributes.keys()[0] 
232                # above line takes the first one from keys
233               
234
235        msg = 'Attribute name %s does not exist in data set' %attribute_name
236        assert self.attributes.has_key(attribute_name), msg
237
238        return self.attributes[attribute_name]
239
240    def get_all_attributes(self):
241        """
242        Return values for all attributes.
243        """
244
245        return self.attributes
246
247    def __add__(self, other):
248        """
249        Returns the addition of 2 geospatical objects,
250        objects are concatencated to the end of each other
251           
252        NOTE: doesn't add if objects contain different
253        attributes 
254       
255        Always return relative points!
256        """
257
258        # find objects zone and checks if the same
259        geo_ref1 = self.get_geo_reference()
260        zone1 = geo_ref1.get_zone()
261       
262        geo_ref2 = other.get_geo_reference()
263        zone2 = geo_ref2.get_zone()
264
265        geo_ref1.reconcile_zones(geo_ref2)
266
267
268        # sets xll and yll as the smallest from self and other
269        # FIXME (Duncan and Ole): use lower left corner derived from absolute coordinates
270        if self.geo_reference.xllcorner <= other.geo_reference.xllcorner:
271            xll = self.geo_reference.xllcorner
272        else:
273            xll = other.geo_reference.xllcorner
274
275        if self.geo_reference.yllcorner <= other.geo_reference.yllcorner:
276            yll = self.geo_reference.yllcorner
277        else:
278            yll = other.geo_reference.yllcorner
279        new_geo_ref = Geo_reference(geo_ref1.get_zone(), xll, yll)
280
281        xll = yll = 0. 
282       
283        relative_points1 = self.get_data_points(absolute = False)
284        relative_points2 = other.get_data_points(absolute = False)
285
286       
287        new_relative_points1 = new_geo_ref.change_points_geo_ref(relative_points1, geo_ref1)
288        new_relative_points2 = new_geo_ref.change_points_geo_ref(relative_points2, geo_ref2)
289       
290        #Now both point sets are relative to new_geo_ref and zones have been reconciled
291
292        # Concatenate points
293        new_points = concatenate((new_relative_points1,
294                                  new_relative_points2),
295                                  axis = 0)
296     
297        # Concatenate attributes
298        new_attributes = {}
299        for x in self.attributes.keys():
300            if other.attributes.has_key(x):
301
302                attrib1 = self.attributes[x]
303                attrib2 = other.attributes[x]
304                new_attributes[x] = concatenate((attrib1, attrib2))
305
306            else:
307                msg = 'Both geospatial_data objects must have the same \n'
308                msg += 'attributes to allow addition.'
309                raise msg
310
311        # Instantiate new data object and return   
312        return Geospatial_data(new_points,
313                               new_attributes,
314                               new_geo_ref)
315   
316    ###
317    #  IMPORT/EXPORT POINTS FILES
318    ###
319
320    def import_points_file(self, file_name, delimiter = None, verbose = False):
321        """ load an .xya or .pts file
322        Note: will throw an IOError if it can't load the file.
323        Catch these!
324        """
325       
326        if access(file_name, F_OK) == 0 :
327            msg = 'File %s does not exist or is not accessible' %file_name
328            raise IOError, msg
329       
330        attributes = {}
331        if file_name[-4:]== ".xya":
332            try:
333                if delimiter == None:
334                    try:
335                        fd = open(file_name)
336                        data_points, attributes, geo_reference = _read_xya_file(fd, ',')
337                    except TitleError:
338                        fd.close()
339                        fd = open(file_name)
340                        data_points, attributes, geo_reference = _read_xya_file(fd, ' ')
341                else:
342                    fd = open(file_name)
343                    data_points, attributes, geo_reference = _read_xya_file(fd, delimiter)
344                fd.close()
345            except (IndexError,ValueError,SyntaxError):
346                fd.close()   
347                msg = 'Could not open file %s ' %file_name
348                raise IOError, msg
349            except IOError, e:
350                fd.close() 
351                # Catch this to add an error message
352                msg = 'Could not open file or incorrect file format %s:%s' %(file_name, e)
353                raise IOError, msg
354               
355        elif file_name[-4:]== ".pts":
356            try:
357                data_points, attributes, geo_reference = _read_pts_file(file_name, verbose)
358            except IOError, e:   
359                msg = 'Could not open file %s ' %file_name
360                raise IOError, msg       
361        else:     
362            msg = 'Extension %s is unknown' %file_name[-4:]
363            raise IOError, msg
364       
365#        print'in import data_points', data_points
366#        print'in import attributes', attributes
367#        print'in import data_points', geo_reference
368        self.data_points = data_points
369        self.attributes = attributes
370        self.geo_reference = geo_reference
371   
372#        return all_data
373   
374    def export_points_file(self, file_name, absolute=True):
375       
376        """
377        write a points file, file_name, as a text (.xya) or binary (.pts) file
378        file_name is the file name, including the extension
379        The point_dict is defined at the top of this file.
380       
381        If absolute is True data points at returned added to the xll and yll
382        and geo_reference as None
383       
384        If absolute is False data points at returned as relative to the xll
385        and yll and geo_reference remains uneffected
386        """
387   
388        if (file_name[-4:] == ".xya"):
389            if absolute is True:         
390                _write_xya_file(file_name, self.get_data_points(absolute=True), 
391                                   self.get_all_attributes())
392            else:
393                _write_xya_file(file_name, self.get_data_points(absolute=False), 
394                                   self.get_all_attributes(),
395                                   self.get_geo_reference())
396                                   
397        elif (file_name[-4:] == ".pts"):
398            if absolute is True:
399                _write_pts_file(file_name, self.get_data_points(absolute), 
400                                   self.get_all_attributes())
401            else:
402                _write_pts_file(file_name, self.get_data_points(absolute), 
403                                   self.get_all_attributes(),
404                                   self.get_geo_reference())
405        else:
406            msg = 'Unknown file type %s ' %file_name
407            raise IOError, msg
408   
409
410
411
412def _read_pts_file(file_name, verbose = False):
413    """Read .pts NetCDF file
414   
415    Return a dic of array of points, and dic of array of attribute
416    eg
417    dic['points'] = [[1.0,2.0],[3.0,5.0]]
418    dic['attributelist']['elevation'] = [[7.0,5.0]
419    """   
420
421    from Scientific.IO.NetCDF import NetCDFFile
422   
423    if verbose: print 'Reading ', file_name
424   
425       
426    # see if the file is there.  Throw a QUIET IO error if it isn't
427    fd = open(file_name,'r')
428    fd.close()
429   
430    #throws prints to screen if file not present
431    fid = NetCDFFile(file_name, 'r') 
432   
433#    point_atts = {} 
434        # Get the variables
435#    point_atts['pointlist'] = array(fid.variables['points'])
436    pointlist = array(fid.variables['points'])
437    keys = fid.variables.keys()
438    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
439    try:
440        keys.remove('points')
441    except IOError, e:       
442        fid.close()   
443        msg = 'Expected keyword "points" but could not find it'
444        raise IOError, msg
445   
446    attributes = {}
447    for key in keys:
448        if verbose: print "reading attribute '%s'" %key
449           
450        attributes[key] = array(fid.variables[key])
451   
452#    point_atts['attributelist'] = attributes
453   
454    try:
455        geo_reference = Geo_reference(NetCDFObject=fid)
456#        point_atts['geo_reference'] = geo_reference
457    except AttributeError, e:
458        #geo_ref not compulsory
459#        point_atts['geo_reference'] = None
460        geo_reference = None
461   
462    fid.close()
463   
464    return pointlist, attributes, geo_reference
465
466
467def _read_xya_file( fd, delimiter):
468    points = []
469    pointattributes = []
470    title = fd.readline()
471    att_names = clean_line(title,delimiter)
472    att_dict = {}
473    line = fd.readline()
474    numbers = clean_line(line,delimiter)
475   
476    while len(numbers) > 1 and line[0] <> '#':
477        if numbers != []:
478            try:
479                x = float(numbers[0])
480                y = float(numbers[1])
481                points.append([x,y])
482                numbers.pop(0)
483                numbers.pop(0)
484                if len(att_names) != len(numbers):
485                    fd.close()
486                    # It might not be a problem with the title
487                    #raise TitleAmountError
488                    raise IOError
489                for i,num in enumerate(numbers):
490                    num.strip()
491                    if num != '\n' and num != '':
492                        #attributes.append(float(num))
493                        att_dict.setdefault(att_names[i],[]).append(float(num))
494            except ValueError:
495                raise SyntaxError
496        line = fd.readline()
497        numbers = clean_line(line,delimiter)
498   
499    if line == '':
500        geo_reference = None
501    else:
502        geo_reference = Geo_reference(ASCIIFile=fd,read_title=line)
503       
504   
505    pointlist = array(points).astype(Float)
506    for key in att_dict.keys():
507        att_dict[key] = array(att_dict[key]).astype(Float)
508   
509    return pointlist, att_dict, geo_reference
510
511def _write_pts_file(file_name, write_data_points,
512                                   write_attributes, 
513                                   write_geo_reference = None):
514    """
515    Write .pts NetCDF file   
516
517    NOTE: Below might not be valid ask Duncan : NB 5/2006
518   
519    WARNING: This function mangles the point_atts data structure
520    #F??ME: (DSG)This format has issues.
521    # There can't be an attribute called points
522    # consider format change
523    # method changed by NB not sure if above statement is correct
524   
525    should create new test for this
526    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
527    for key in point_atts.keys():
528        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
529        assert key in legal_keys, msg
530    """   
531    from Scientific.IO.NetCDF import NetCDFFile
532    # NetCDF file definition
533    outfile = NetCDFFile(file_name, 'w')
534   
535    #Create new file
536    outfile.institution = 'Geoscience Australia'
537    outfile.description = 'NetCDF format for compact and portable storage ' +\
538                      'of spatial point data'
539   
540    # dimension definitions
541    shape = write_data_points.shape[0]
542    outfile.createDimension('number_of_points', shape) 
543    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
544   
545    # variable definition
546    outfile.createVariable('points', Float, ('number_of_points',
547                                             'number_of_dimensions'))
548
549    #create variables 
550    outfile.variables['points'][:] = write_data_points #.astype(Float32)
551
552    for key in write_attributes.keys():
553        outfile.createVariable(key, Float, ('number_of_points',))
554        outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
555       
556    if write_geo_reference is not None:
557        write_geo_reference.write_NetCDF(outfile)
558       
559    outfile.close() 
560 
561
562
563def _write_xya_file( file_name, write_data_points,
564                                    write_attributes, 
565                                    write_geo_reference = None, 
566                                    delimiter = ','):
567    """
568    export a file, file_name, with the xya format
569   
570    """
571    points = write_data_points
572    pointattributes = write_attributes
573   
574    fd = open(file_name,'w')
575    titlelist = ""
576    for title in pointattributes.keys():
577        titlelist = titlelist + title + delimiter
578    titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
579    fd.write(titlelist+"\n")
580    #<vertex #> <x> <y> [attributes]
581    for i,vert in enumerate( points):
582       
583        attlist = ","
584        for att in pointattributes.keys():
585            attlist = attlist + str(pointattributes[att][i])+ delimiter
586        attlist = attlist[0:-len(delimiter)] # remove the last delimiter
587        attlist.strip()
588        fd.write( str(vert[0]) + delimiter
589                  + str(vert[1])
590                  + attlist + "\n")
591
592    if  write_geo_reference is not None:
593        write_geo_reference.write_ASCII(fd)
594    fd.close()
595
596
597   
598def _point_atts2array(point_atts):
599    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
600   
601    for key in point_atts['attributelist'].keys():
602        point_atts['attributelist'][key]= array(point_atts['attributelist'][key]).astype(Float)
603    return point_atts
604
605   
606
607
608def geospatial_data2points_dictionary(geospatial_data):
609    """Convert geospatial data to points_dictionary
610    """
611
612    points_dictionary = {}
613    points_dictionary['pointlist'] = geospatial_data.data_points
614
615    points_dictionary['attributelist'] = {}
616
617    for attribute_name in geospatial_data.attributes.keys():
618        val = geospatial_data.attributes[attribute_name]
619        points_dictionary['attributelist'][attribute_name] = val
620
621    points_dictionary['geo_reference'] = geospatial_data.geo_reference
622
623    return points_dictionary
624
625   
626def points_dictionary2geospatial_data(points_dictionary):
627    """Convert points_dictionary to geospatial data object
628    """
629
630    msg = 'Points dictionary must have key pointlist' 
631    assert points_dictionary.has_key('pointlist'), msg
632
633    msg = 'Points dictionary must have key attributelist'     
634    assert points_dictionary.has_key('attributelist'), msg       
635
636    if points_dictionary.has_key('geo_reference'):
637        geo = points_dictionary['geo_reference']
638    else:
639        geo = None
640   
641    return Geospatial_data(points_dictionary['pointlist'],
642                           points_dictionary['attributelist'],
643                           geo_reference = geo)
644
645def clean_line(line,delimiter):     
646    """Remove whitespace
647    """
648    #print ">%s" %line
649    line = line.strip()
650    #print "stripped>%s" %line
651    numbers = line.split(delimiter)
652    i = len(numbers) - 1
653    while i >= 0:
654        if numbers[i] == '':
655            numbers.pop(i)
656        else:
657            numbers[i] = numbers[i].strip()
658       
659        i += -1
660    #for num in numbers:
661    #    print "num>%s<" %num
662    return numbers
663           
664def xxx_add_points_files(add_file1, add_file2, results_file):
665    """ adds the points and attruibutes of 2 pts or xya files and
666    writes it to a pts file
667   
668    NOTE will add the function to check and remove points from one set
669    that are shared. This will require some work and maybe __subtract__ function
670    """
671   
672    G1 = Geospatial_data(file_name = add_file1)
673    G2 = Geospatial_data(file_name = add_file2)
674    new_add_file2 = add_file2[:-4] + '.pts' 
675
676    G = G1 + G2
677   
678    #FIXME remove dependance on points to dict in export only!
679#    G_points_dict = geospatial_data2points_dictionary(G)
680#    export_points_file(results_file, G_points_dict)
681
682#    G_points_dict = geospatial_data2points_dictionary(G)
683
684    G.export_points_file(results_file)
685
686# '
687def ensure_absolute(points, geo_reference = None):
688    """
689    This function inputs several formats and
690    outputs one format. - a numeric array of absolute points.
691
692    Inputed formats are;
693    points: List or numeric array of coordinate pairs [xi, eta] of
694              points or geospatial object
695
696    mesh_origin: A geo_reference object or 3-tuples consisting of
697                 UTM zone, easting and northing.
698                 If specified vertex coordinates are assumed to be
699                 relative to their respective origins.
700    """
701    if isinstance(points,Geospatial_data):
702        points = points.get_data_points( \
703                absolute = True)
704        msg = "Use a Geospatial_data object or a mesh origin. Not both."
705        assert geo_reference == None, msg
706           
707    else:
708        points = ensure_numeric(points, Float)
709    if geo_reference is None:
710        geo = None #Geo_reference()
711    else:
712        if isinstance(geo_reference, Geo_reference):
713            geo = geo_reference
714        else:
715            geo = Geo_reference(geo_reference[0],
716                                geo_reference[1],
717                                geo_reference[2])
718        points = geo.get_absolute(points)
719    return points
720     
721
722def ensure_geospatial(points, geo_reference = None):
723    """
724    This function inputs several formats and
725    outputs one format. - a geospatial_data instance.
726
727    Inputed formats are;
728    points: List or numeric array of coordinate pairs [xi, eta] of
729              points or geospatial object
730
731    mesh_origin: A geo_reference object or 3-tuples consisting of
732                 UTM zone, easting and northing.
733                 If specified vertex coordinates are assumed to be
734                 relative to their respective origins.
735    """
736    if isinstance(points,Geospatial_data):
737        #points = points.get_data_points( \
738        #        absolute = True)
739        msg = "Use a Geospatial_data object or a mesh origin. Not both."
740        assert geo_reference == None, msg
741           
742    else:
743        points = ensure_numeric(points, Float)
744    if geo_reference is None:
745        geo = None #Geo_reference()
746    else:
747        if isinstance(geo_reference, Geo_reference):
748            geo = geo_reference
749        else:
750            geo = Geo_reference(geo_reference[0],
751                                geo_reference[1],
752                                geo_reference[2])
753        points = Geospatial_data(data_points=points, geo_reference=geo)
754        #points = geo.get_absolute(points)
755    return points
756             
Note: See TracBrowser for help on using the repository browser.