source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4660

Last change on this file since 4660 was 4660, checked in by duncan, 16 years ago

removing delimiter from geospatial_data.py

File size: 52.5 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5from sys import maxint
6from os import access, F_OK, R_OK
7from types import DictType
8from warnings import warn
9from string import lower
10from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11                        size, shape
12#from Array import tolist
13from RandomArray import randint
14from copy import deepcopy
15
16#from MA import tolist
17
18from Scientific.IO.NetCDF import NetCDFFile   
19from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL   
20from anuga.utilities.numerical_tools import ensure_numeric
21from anuga.coordinate_transforms.geo_reference import Geo_reference, \
22     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
23from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
24from anuga.utilities.anuga_exceptions import ANUGAError
25from anuga.config import points_file_block_line_size as MAX_READ_LINES   
26
27DEFAULT_ATTRIBUTE = 'elevation'
28
29class Geospatial_data:
30
31    def __init__(self,
32                 data_points=None, # this can also be a points file name
33                 attributes=None,
34                 geo_reference=None,
35                 default_attribute_name=None,
36                 file_name=None,
37                 latitudes=None,
38                 longitudes=None,
39                 points_are_lats_longs=False,
40                 max_read_lines=None,
41                 load_file_now=True,
42                 verbose=False):
43
44       
45        """
46        Create instance from data points and associated attributes
47
48        data_points: x,y coordinates in meters. Type must be either a
49        sequence of 2-tuples or an Mx2 Numeric array of floats.  A file name
50        with extension .txt, .cvs or .pts can also be passed in here.
51
52        attributes: Associated values for each data point. The type
53        must be either a list or an array of length M or a dictionary
54        of lists (or arrays) of length M. In the latter case the keys
55        in the dictionary represent the attribute names, in the former
56        the attribute will get the default name "elevation".
57       
58        geo_reference: Object representing the origin of the data
59        points. It contains UTM zone, easting and northing and data
60        points are assumed to be relative to this origin.
61        If geo_reference is None, the default geo ref object is used.
62
63        default_attribute_name: Name of default attribute to be used with
64        get_attribute_values. The idea is that the dataset can be
65        equipped with information about which attribute to return.
66        If None, the default is the "first"
67
68        latitudes, longitudes: Vectors of latitudes and longitudes,
69        used to specify location instead of points.
70       
71        points_are_lats_longs: Set this as true if the points are actually
72        lats and longs, not UTM
73
74        max_read_lines: The number of rows read into memory when using
75        blocking to read a file.
76
77        load_file_now:  If true the file is automatically loaded
78        into the geospatial instance. Used when blocking.
79       
80        file_name: Name of input netCDF file or .txt file. netCDF file must
81        have dimensions "points" etc.
82        .txt file is a comma seperated file with x, y and attribute
83        data.
84       
85        The first line has the titles of the columns.  The first two
86        column titles are checked to see if they start with lat or
87        long (not case sensitive).  If so the data is assumed to be
88        latitude and longitude, in decimal format and converted to
89        UTM.  Otherwise the first two columns are assumed to be the x
90        and y, and the title names acually used are ignored.
91 
92       
93        The format for a .txt file is:
94            1st line:     [column names]
95            other lines:  x y [attributes]
96
97            for example:
98            x, y, elevation, friction
99            0.6, 0.7, 4.9, 0.3
100            1.9, 2.8, 5, 0.3
101            2.7, 2.4, 5.2, 0.3
102
103        The first two columns have to be x, y or lat, long
104        coordinates.
105     
106       
107        The format for a Points dictionary is:
108
109          ['pointlist'] a 2 column array describing points. 1st column x,
110          2nd column y.
111          ['attributelist'], a dictionary of 1D arrays, representing
112          attribute values at the point.  The dictionary key is the attribute
113          header.
114          ['geo_reference'] a Geo_refernece object. Use if the point
115          information is relative. This is optional.
116            eg
117            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
118            dic['attributelist']['elevation'] = [[7.0,5.0]
119               
120        verbose:
121
122         
123        """
124
125        if isinstance(data_points, basestring):
126            # assume data point is really a file name
127            file_name = data_points
128
129        self.set_verbose(verbose)
130        self.geo_reference=None #create the attribute
131        self.file_name = file_name
132        self.max_read_lines = max_read_lines
133
134        if file_name is None:
135            if latitudes is not None or longitudes is not None or \
136                   points_are_lats_longs:
137                data_points, geo_reference =  \
138                             _set_using_lat_long(latitudes=latitudes,
139                                  longitudes=longitudes,
140                                  geo_reference=geo_reference,
141                                  data_points=data_points,
142                                  points_are_lats_longs=points_are_lats_longs)
143            self.check_data_points(data_points)
144            self.set_attributes(attributes)
145            self.set_geo_reference(geo_reference)
146            self.set_default_attribute_name(default_attribute_name)
147
148        elif load_file_now is True:
149            # watch for case where file name and points,
150            # attributes etc are provided!!
151            # if file name then all provided info will be removed!
152
153            if verbose is True:
154                if file_name is not None:
155                    print 'Loading Geospatial data from file: %s' %file_name
156           
157            self.import_points_file(file_name, verbose=verbose)
158               
159            self.check_data_points(self.data_points)
160            self.set_attributes(self.attributes) 
161            self.set_geo_reference(self.geo_reference)
162            self.set_default_attribute_name(default_attribute_name)
163
164        if verbose is True:
165            if file_name is not None:
166                print 'Geospatial data created from file: %s' %file_name
167                if load_file_now is False:
168                    print 'Data will be loaded blockwise on demand'
169
170                    if file_name.endswith('csv') or file_name.endswith('txt'):
171                        print 'ASCII formats are not that great for '
172                        print 'blockwise reading. Consider storing this'
173                        print 'data as a pts NetCDF format'
174
175    def __len__(self):
176        return len(self.data_points)
177
178    def __repr__(self):
179        return str(self.get_data_points(absolute=True))
180   
181   
182    def check_data_points(self, data_points):
183        """Checks data points
184        """
185       
186        if data_points is None:
187            self.data_points = None
188            msg = 'There is no data or file provided!'
189            raise ValueError, msg
190           
191        else:
192            self.data_points = ensure_numeric(data_points)
193            #print "self.data_points.shape",self.data_points.shape
194            if not (0,) == self.data_points.shape:
195                assert len(self.data_points.shape) == 2
196                assert self.data_points.shape[1] == 2
197
198    def set_attributes(self, attributes):
199        """Check and assign attributes dictionary
200        """
201       
202        if attributes is None:
203            self.attributes = None
204            return
205       
206        if not isinstance(attributes, DictType):
207            #Convert single attribute into dictionary
208            attributes = {DEFAULT_ATTRIBUTE: attributes}
209
210        #Check input attributes   
211        for key in attributes.keys():
212            try:
213                attributes[key] = ensure_numeric(attributes[key])
214            except:
215                msg = 'Attribute %s could not be converted' %key
216                msg += 'to a numeric vector'
217                raise msg
218
219        self.attributes = attributes   
220
221
222    def set_geo_reference(self, geo_reference):
223        """
224        Set's the georeference of geospatial.
225        It can also be used to change the georeference
226        """
227        from anuga.coordinate_transforms.geo_reference import Geo_reference
228
229        if geo_reference is None:
230            geo_reference = Geo_reference() # Use default
231        geo_reference = ensure_geo_reference(geo_reference)
232        if not isinstance(geo_reference, Geo_reference):
233            msg = 'Argument geo_reference must be a valid Geo_reference \n'
234            msg += 'object or None.'
235            raise msg
236
237        # if a geo ref already exists, change the point data to
238        # represent the new geo-ref
239        if  self.geo_reference is not None:
240            #FIXME: Maybe put out a warning here...
241            self.data_points = self.get_data_points \
242                               (geo_reference=geo_reference)
243           
244        self.geo_reference = geo_reference
245
246
247    def set_default_attribute_name(self, default_attribute_name):
248        self.default_attribute_name = default_attribute_name
249
250    def set_verbose(self, verbose=False):
251        if verbose in [False, True]:
252            self.verbose = verbose
253        else:
254            msg = 'Illegal value: %s' %str(verbose)
255            raise Exception, msg
256
257    def clip(self, polygon, closed=True):
258        """Clip geospatial data by a polygon
259
260        Input
261          polygon - Either a list of points, an Nx2 array or
262                    a Geospatial data object.
263          closed - (optional) determine whether points on boundary should be
264          regarded as belonging to the polygon (closed = True)
265          or not (closed = False). Default is True.
266           
267        Output
268          New geospatial data object representing points inside
269          specified polygon.
270       
271        """
272
273        from anuga.utilities.polygon import inside_polygon
274
275        if isinstance(polygon, Geospatial_data):
276            # Polygon is an object - extract points
277            polygon = polygon.get_data_points()
278
279        points = self.get_data_points()   
280        inside_indices = inside_polygon(points, polygon, closed)
281
282        clipped_G = self.get_sample(inside_indices)
283#        clipped_points = take(points, inside_indices)
284
285        # Clip all attributes
286#        attributes = self.get_all_attributes()
287
288#        clipped_attributes = {}
289#        if attributes is not None:
290#            for key, att in attributes.items():
291#                clipped_attributes[key] = take(att, inside_indices)
292
293#        return Geospatial_data(clipped_points, clipped_attributes)
294        return clipped_G
295       
296       
297    def clip_outside(self, polygon, closed=True):
298        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
299
300        Input
301          polygon - Either a list of points, an Nx2 array or
302                    a Geospatial data object.
303          closed - (optional) determine whether points on boundary should be
304          regarded as belonging to the polygon (closed = True)
305          or not (closed = False). Default is True.
306         
307        Output
308          Geospatial data object representing point OUTSIDE specified polygon
309       
310        """
311
312        from anuga.utilities.polygon import outside_polygon
313
314        if isinstance(polygon, Geospatial_data):
315            # Polygon is an object - extract points
316            polygon = polygon.get_data_points()
317
318        points = self.get_data_points()   
319        outside_indices = outside_polygon(points, polygon, closed)
320
321        clipped_G = self.get_sample(outside_indices)
322
323#        clipped_points = take(points, outside_indices)
324
325        # Clip all attributes
326#        attributes = self.get_all_attributes()
327
328#        clipped_attributes = {}
329#        if attributes is not None:
330#            for key, att in attributes.items():
331#                clipped_attributes[key] = take(att, outside_indices)
332
333#        return Geospatial_data(clipped_points, clipped_attributes)
334        return clipped_G
335
336   
337    def get_geo_reference(self):
338        return self.geo_reference
339       
340    def get_data_points(self, absolute=True, geo_reference=None,
341                        as_lat_long=False,isSouthHemisphere=True):
342        """Get coordinates for all data points as an Nx2 array
343
344        If absolute is False returned coordinates are relative to the
345        internal georeference's xll and yll corners, otherwise
346        absolute UTM coordinates are returned.
347
348        If a geo_reference is passed the points are returned relative
349        to that geo_reference.
350       
351        isSH (isSouthHemisphere) is only used when getting data
352        points "as_lat_long" is True and if FALSE will return lats and
353        longs valid for the Northern Hemisphere.
354
355        Default: absolute is True.
356        """
357        if as_lat_long is True:
358            msg = "Points need a zone to be converted into lats and longs"
359            assert self.geo_reference is not None, msg
360            zone = self.geo_reference.get_zone()
361            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
362            lats_longs = []
363            for point in self.get_data_points(True):
364                ### UTMtoLL(northing, easting, zone,
365                lat_calced, long_calced = UTMtoLL(point[1],point[0], 
366                                                  zone, isSouthHemisphere)
367                lats_longs.append((lat_calced, long_calced)) # to hash
368            return lats_longs
369           
370        if absolute is True and geo_reference is None:
371            return self.geo_reference.get_absolute(self.data_points)
372        elif geo_reference is not None:
373            return geo_reference.change_points_geo_ref \
374                               (self.data_points, 
375                                self.geo_reference)
376        else:
377            return self.data_points
378       
379   
380    def get_attributes(self, attribute_name=None):
381        """Return values for one named attribute.
382
383        If attribute_name is None, default_attribute_name is used
384        """
385
386        if attribute_name is None:
387            if self.default_attribute_name is not None:
388                attribute_name = self.default_attribute_name
389            else:
390                attribute_name = self.attributes.keys()[0] 
391                # above line takes the first one from keys
392       
393        if self.verbose is True:
394            print 'Using attribute %s' %attribute_name
395            print 'Available attributes: %s' %(self.attributes.keys())       
396
397        msg = 'Attribute name %s does not exist in data set' %attribute_name
398        assert self.attributes.has_key(attribute_name), msg
399
400        return self.attributes[attribute_name]
401
402    def get_all_attributes(self):
403        """
404        Return values for all attributes.
405        The return value is either None or a dictionary (possibly empty).
406        """
407
408        return self.attributes
409
410    def __add__(self, other):
411        """
412        Returns the addition of 2 geospatical objects,
413        objects are concatencated to the end of each other
414           
415        NOTE: doesn't add if objects contain different
416        attributes 
417       
418        Always return absolute points!
419        This alse means, that if you add None to the object,
420        it will be turned into absolute coordinates
421
422        other can be None in which case nothing is added to self.
423        """
424
425
426        # find objects zone and checks if the same
427        geo_ref1 = self.get_geo_reference()
428        zone1 = geo_ref1.get_zone()
429
430
431        if other is not None:
432
433            geo_ref2 = other.get_geo_reference()
434            zone2 = geo_ref2.get_zone()
435
436            geo_ref1.reconcile_zones(geo_ref2)
437
438       
439            new_points = concatenate((self.get_data_points(absolute=True),
440                                      other.get_data_points(absolute=True)),
441                                      axis = 0)       
442
443       
444     
445            # Concatenate attributes if any
446            if self.attributes is None:
447                if other.attributes is not None:
448                    msg = 'Geospatial data must have the same \n'
449                    msg += 'attributes to allow addition.'
450                    raise Exception, msg
451               
452                new_attributes = None
453            else:   
454                new_attributes = {}
455                for x in self.attributes.keys():
456                    if other.attributes.has_key(x):
457
458                        attrib1 = self.attributes[x]
459                        attrib2 = other.attributes[x]
460                        new_attributes[x] = concatenate((attrib1, attrib2))
461
462                    else:
463                        msg = 'Geospatial data must have the same \n'
464                        msg += 'attributes to allow addition.'
465                        raise Exception, msg
466
467
468        else:
469            #other is None:
470           
471            new_points = self.get_data_points(absolute=True)
472            new_attributes = self.attributes
473
474                   
475
476        # Instantiate new data object and return absolute coordinates
477        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
478        return Geospatial_data(new_points,
479                               new_attributes,
480                               new_geo_ref)
481
482
483    def __radd__(self, other):
484        """Handle cases like None + Geospatial_data(...)
485        """
486
487        return self + other
488
489   
490   
491    ###
492    #  IMPORT/EXPORT POINTS FILES
493    ###
494
495    def import_points_file(self, file_name, delimiter=None, verbose=False):
496        """ load an .txt, .csv or .pts file
497        Note: will throw an IOError/SyntaxError if it can't load the file.
498        Catch these!
499
500        Post condition: self.attributes dictionary has been set
501        """
502       
503        if access(file_name, F_OK) == 0 :
504            msg = 'File %s does not exist or is not accessible' %file_name
505            raise IOError, msg
506       
507        attributes = {}
508        if file_name[-4:]== ".pts":
509            try:
510                data_points, attributes, geo_reference =\
511                             _read_pts_file(file_name, verbose)
512            except IOError, e:   
513                msg = 'Could not open file %s ' %file_name
514                raise IOError, msg 
515       
516        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
517            try:
518                data_points, attributes, geo_reference =\
519                             _read_csv_file(file_name, verbose)
520            except IOError, e:
521                # This should only be if a file is not found
522                msg = 'Could not open file %s. ' %file_name
523                msg += 'Check the file location.'
524                raise IOError, msg
525            except SyntaxError, e:
526                # This should only be if there is a format error
527                msg = 'Could not open file %s. \n' %file_name
528                msg += Error_message['IOError']
529                #print "msg", msg
530                raise SyntaxError, msg
531        else:     
532            msg = 'Extension %s is unknown' %file_name[-4:]
533            raise IOError, msg
534#        print'in import data_points', data_points
535#        print'in import attributes', attributes
536#        print'in import data_points', geo_reference
537        self.data_points = data_points
538        self.attributes = attributes
539        self.geo_reference = geo_reference
540   
541#        return all_data
542   
543    def export_points_file(self, file_name, absolute=True, 
544                           as_lat_long=False, isSouthHemisphere=True):
545       
546        """
547        write a points file, file_name, as a text (.xya) or binary (.pts) file
548        file_name is the file name, including the extension
549        The point_dict is defined at the top of this file.
550       
551        If absolute is True data the xll and yll are added to the points value
552        and the xll and yll of the geo_reference are set to 0.
553       
554        If absolute is False data points at returned as relative to the xll
555        and yll and geo_reference remains uneffected
556       
557        isSouthHemisphere: is only used when getting data
558        points "as_lat_long" is True and if FALSE will return lats and
559        longs valid for the Northern Hemisphere.
560       
561        """
562
563        if (file_name[-4:] == ".pts"):
564            if absolute is True:
565                geo_ref = deepcopy(self.geo_reference)
566                geo_ref.xllcorner = 0
567                geo_ref.yllcorner = 0
568                _write_pts_file(file_name,
569                                self.get_data_points(absolute), 
570                                self.get_all_attributes(),
571                                geo_ref)
572            else:
573                _write_pts_file(file_name,
574                                self.get_data_points(absolute), 
575                                self.get_all_attributes(),
576                                self.get_geo_reference())
577               
578        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
579            msg = "ERROR: trying to write a .txt file with relative data."
580            assert absolute, msg
581            _write_csv_file(file_name,
582                            self.get_data_points(absolute=True,
583                                                 as_lat_long=as_lat_long,
584                                  isSouthHemisphere=isSouthHemisphere), 
585                            self.get_all_attributes(),
586                            as_lat_long=as_lat_long)
587             
588        elif file_name[-4:] == ".urs" :
589            msg = "ERROR: Can not write a .urs file as a relative file."
590            assert absolute, msg
591            _write_urs_file(file_name,
592                            self.get_data_points(as_lat_long=True,
593                                  isSouthHemisphere=isSouthHemisphere))
594                                                         
595        else:
596            msg = 'Unknown file type %s ' %file_name
597            raise IOError, msg
598       
599    def get_sample(self, indices):
600        """ Returns a object which is a subset of the original
601        and the data points and attributes in this new object refer to
602        the indices provided
603       
604        Input
605            indices- a list of integers that represent the new object
606        Output
607            New geospatial data object representing points specified by
608            the indices
609            """
610        #FIXME: add the geo_reference to this
611       
612        points = self.get_data_points()
613        sampled_points = take(points, indices)
614
615        attributes = self.get_all_attributes()
616
617        sampled_attributes = {}
618        if attributes is not None:
619            for key, att in attributes.items():
620                sampled_attributes[key] = take(att, indices)
621
622        return Geospatial_data(sampled_points, sampled_attributes)
623   
624   
625    def split(self, factor=0.5, verbose=False):
626        """Returns two
627        geospatial_data object, first is the size of the 'factor'
628        smaller the original and the second is the remainder. The two
629        new object are disjoin set of each other.
630       
631        Points of the two new object have selected RANDOMLY.
632        AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
633       
634        This method create two lists of indices which are passed into
635        get_sample.  The lists are created using random numbers, and
636        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
637        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
638               
639        Input - the factor which to split the object, if 0.1 then 10% of the
640together             object will be returned
641       
642        Output - two geospatial_data objects that are disjoint sets of the
643            original
644            """
645       
646        i=0
647        self_size = len(self)
648        random_list = []
649        remainder_list = []
650        new_size = round(factor*self_size)
651       
652        #find unique random numbers
653        if verbose: print "make unique random number list and get indices"
654
655        total=array(range(self_size))
656        total_list = total.tolist()
657        if verbose: print "total list len",len(total_list)
658               
659        #there will be repeated random numbers however will not be a
660        #problem as they are being 'pop'ed out of array so if there
661        #are two numbers the same they will pop different indicies,
662        #still basically random
663        ## create list of non-unquie random numbers
664        if verbose: print "create random numbers list %s long" %new_size
665        random_num = randint(0,self_size-1,(int(new_size),))
666        random_num = random_num.tolist()
667
668        #need to sort and reverse so the pop() works correctly
669        random_num.sort()
670        random_num.reverse()       
671       
672        if verbose: print "make random number list and get indices"
673        j=0
674        k=1
675        remainder_list = total_list[:]
676        #pops array index (random_num) from remainder_list
677        # (which starts as the
678        #total_list and appends to random_list 
679        random_num_len = len(random_num)
680        for i in random_num:
681            random_list.append(remainder_list.pop(i))
682            j+=1
683            #prints progress
684            if verbose and round(random_num_len/10*k)==j:
685                print '(%s/%s)' %(j, random_num_len)
686                k+=1
687       
688        #FIXME: move to tests, it might take a long time
689        #then create an array of random lenght between 500 and 1000,
690        #and use a random factor between 0 and 1
691        #setup for assertion
692        test_total = random_list[:]
693        test_total.extend(remainder_list)
694        test_total.sort() 
695        msg = 'The two random lists made from the original list when added '\
696         'together DO NOT equal the original list'
697        assert (total_list==test_total),msg
698
699        #get new samples
700        if verbose: print "get values of indices for random list"
701        G1 = self.get_sample(random_list)
702        if verbose: print "get values of indices for opposite of random list"
703        G2 = self.get_sample(remainder_list)
704
705        return G1, G2
706
707    def __iter__(self):
708        """Read in the header, number_of_points and save the
709        file pointer position
710        """
711
712        from Scientific.IO.NetCDF import NetCDFFile
713       
714        #FIXME - what to do if the file isn't there
715
716        # FIXME (Ole): Shouldn't this go into the constructor?
717        # ... and shouldn't it be called block_size?
718        if self.max_read_lines is None:
719            self.max_read_lines = MAX_READ_LINES
720       
721        if self.file_name[-4:] == ".pts":
722           
723            # see if the file is there.  Throw a QUIET IO error if it isn't
724            fd = open(self.file_name,'r')
725            fd.close()
726   
727            #throws prints to screen if file not present
728            self.fid = NetCDFFile(self.file_name, 'r')
729           
730            self.blocking_georef, self.blocking_keys, self.number_of_points =\
731                                  _read_pts_file_header(self.fid,
732                                                        self.verbose)
733            self.start_row = 0
734            self.last_row = self.number_of_points           
735            self.show_verbose = 0
736            self.verbose_block_size = (self.last_row + 10)/10
737            self.block_number = 0
738            self.number_of_blocks = self.number_of_points/self.max_read_lines
739            # This computes the number of full blocks. The last block may be
740            # smaller and won't be ircluded in this estimate.
741           
742            if self.verbose is True:
743                print 'Reading %d points (in ~%d blocks) from file %s. '\
744                      %(self.number_of_points,
745                        self.number_of_blocks,
746                        self.file_name),
747                print 'Each block consists of %d data points'\
748                      %self.max_read_lines
749           
750        else:
751            # assume the file is a csv file
752            file_pointer = open(self.file_name)
753            self.header, self.file_pointer = \
754                         _read_csv_file_header(file_pointer)
755            self.blocking_georef = None # Used for reconciling zones
756
757        return self
758   
759   
760    def next(self):
761        """ read a block, instanciate a new geospatial and return it"""
762       
763        if self.file_name[-4:] == ".pts":
764            if self.start_row == self.last_row:
765                # read the end of the file last iteration
766                # remove blocking attributes
767                self.fid.close()
768                del self.max_read_lines
769                del self.blocking_georef
770                del self.last_row
771                del self.start_row
772                del self.blocking_keys
773                del self.fid
774                raise StopIteration
775            fin_row = self.start_row + self.max_read_lines
776            if fin_row > self.last_row:
777                fin_row = self.last_row
778
779               
780           
781            if self.verbose is True:
782                if self.show_verbose >= self.start_row and \
783                       self.show_verbose < fin_row:
784
785                   
786                    #print 'Doing %d of %d' %(self.start_row, self.last_row+10)
787
788                    print 'Reading block %d (points %d to %d) out of %d'\
789                          %(self.block_number,
790                            self.start_row,
791                            fin_row,
792                            self.number_of_blocks)
793
794                   
795                    self.show_verbose += max(self.max_read_lines,
796                                             self.verbose_block_size)
797
798                 
799            # Read next block
800            pointlist, att_dict, = \
801                       _read_pts_file_blocking(self.fid,
802                                               self.start_row,
803                                               fin_row,
804                                               self.blocking_keys)
805           
806            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
807            self.start_row = fin_row
808           
809            self.block_number += 1           
810           
811        else:
812            # assume the file is a csv file
813            try:
814                pointlist, att_dict, geo_ref, self.file_pointer = \
815                   _read_csv_file_blocking( self.file_pointer,
816                                            self.header[:],
817                                            max_read_lines=self.max_read_lines,
818                                            verbose=self.verbose)
819
820                # Check that the zones haven't changed.
821                if geo_ref is not None:
822                    geo_ref.reconcile_zones(self.blocking_georef)
823                    self.blocking_georef = geo_ref
824                elif self.blocking_georef is not None:
825                   
826                    msg = 'Geo reference given, then not given.'
827                    msg += ' This should not happen.' 
828                    raise ValueError, msg
829                geo = Geospatial_data(pointlist, att_dict, geo_ref)
830            except StopIteration:
831                self.file_pointer.close()
832                del self.header
833                del self.file_pointer
834                raise StopIteration
835            except ANUGAError:
836                self.file_pointer.close()
837                del self.header
838                del self.file_pointer
839                raise
840            except SyntaxError:
841                self.file_pointer.close()
842                del self.header
843                del self.file_pointer
844                # This should only be if there is a format error
845                msg = 'Could not open file %s. \n' %self.file_name
846                msg += Error_message['IOError']
847                raise SyntaxError, msg
848        return geo
849##################### Error messages ###########
850Error_message = {}
851Em = Error_message
852Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
853Em['IOError'] += "        1st line:     [column names]\n"
854Em['IOError'] += "        other lines:  [x value], [y value], [attributes]\n"
855Em['IOError'] += "\n"
856Em['IOError'] += "           for example:\n"
857Em['IOError'] += "           x, y, elevation, friction\n"
858Em['IOError'] += "           0.6, 0.7, 4.9, 0.3\n"
859Em['IOError'] += "           1.9, 2.8, 5, 0.3\n"
860Em['IOError'] += "           2.7, 2.4, 5.2, 0.3\n"
861Em['IOError'] += "\n"
862Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
863Em['IOError'] += "The attribute values must be numeric.\n"
864
865def _set_using_lat_long(latitudes,
866                        longitudes,
867                        geo_reference,
868                        data_points,
869                        points_are_lats_longs):
870    """
871    if the points has lat long info, assume it is in (lat, long) order.
872    """
873   
874    if geo_reference is not None:
875        msg = """A georeference is specified yet latitude and longitude
876        are also specified!"""
877        raise ValueError, msg
878   
879    if data_points is not None and not points_are_lats_longs:
880        msg = """Data points are specified yet latitude and
881        longitude are also specified!"""
882        raise ValueError, msg
883   
884    if points_are_lats_longs:
885        if data_points is None:
886            msg = """Data points are not specified !"""
887            raise ValueError, msg
888        lats_longs = ensure_numeric(data_points)
889        latitudes = ravel(lats_longs[:,0:1])
890        longitudes = ravel(lats_longs[:,1:])
891       
892    if latitudes is None and longitudes is None:
893        msg = """Latitudes and Longitudes are not."""
894        raise ValueError, msg
895   
896    if latitudes is None:
897        msg = """Longitudes are specified yet latitudes aren't!"""
898        raise ValueError, msg
899   
900    if longitudes is None:
901        msg = """Latitudes are specified yet longitudes aren't!"""
902        raise ValueError, msg
903   
904    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
905                                                    longitudes=longitudes)
906    return data_points, Geo_reference(zone=zone)
907
908   
909def _read_pts_file(file_name, verbose=False):
910    """Read .pts NetCDF file
911   
912    Return a dic of array of points, and dic of array of attribute
913    eg
914    dic['points'] = [[1.0,2.0],[3.0,5.0]]
915    dic['attributelist']['elevation'] = [[7.0,5.0]
916    """   
917
918    from Scientific.IO.NetCDF import NetCDFFile
919   
920    if verbose: print 'Reading ', file_name
921   
922       
923    # see if the file is there.  Throw a QUIET IO error if it isn't
924    fd = open(file_name,'r')
925    fd.close()
926   
927    #throws prints to screen if file not present
928    fid = NetCDFFile(file_name, 'r') 
929   
930    pointlist = array(fid.variables['points'])
931    keys = fid.variables.keys()
932    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
933    try:
934        keys.remove('points')
935    except IOError, e:       
936        fid.close()   
937        msg = 'Expected keyword "points" but could not find it'
938        raise IOError, msg
939   
940    attributes = {}
941    for key in keys:
942        if verbose: print "reading attribute '%s'" %key
943           
944        attributes[key] = array(fid.variables[key])
945   
946   
947    try:
948        geo_reference = Geo_reference(NetCDFObject=fid)
949    except AttributeError, e:
950        geo_reference = None
951   
952    fid.close()
953   
954    return pointlist, attributes, geo_reference
955
956
957def _read_csv_file(file_name, verbose=False):
958    """Read .csv file
959   
960    Return a dic of array of points, and dic of array of attribute
961    eg
962    dic['points'] = [[1.0,2.0],[3.0,5.0]]
963    dic['attributelist']['elevation'] = [[7.0,5.0]
964    """
965   
966    file_pointer = open(file_name)
967    header, file_pointer = _read_csv_file_header(file_pointer)
968    try:
969        pointlist, att_dict, geo_ref, file_pointer = \
970                   _read_csv_file_blocking( \
971                file_pointer,
972                header,
973                max_read_lines=1e30) #If the file is bigger that this, block..
974    except ANUGAError:
975        file_pointer.close()
976        raise 
977    file_pointer.close()
978    return pointlist, att_dict, geo_ref   
979
980CSV_DELIMITER = ','
981def _read_csv_file_header(file_pointer,
982                          delimiter=CSV_DELIMITER,
983                          verbose=False):
984
985    """Read the header of a .csv file
986    Return a list of the header names
987    """
988    line = file_pointer.readline()
989    header = clean_line(line, delimiter)
990    return header, file_pointer
991
992def _read_csv_file_blocking(file_pointer, header,
993                            delimiter=CSV_DELIMITER,
994                            max_read_lines=MAX_READ_LINES,
995                            verbose=False):
996   
997
998    """
999    Read the body of a .csv file.
1000    header: The list header of the csv file, with the x and y labels.
1001    """
1002    points = []
1003    pointattributes = []
1004    att_dict = {}
1005
1006    #This is to remove the x and y headers.
1007    header = header[:]
1008    try:
1009        x_header = header.pop(0)
1010        y_header = header.pop(0)
1011    except IndexError:
1012        # if there are not two columns this will occur.
1013        # eg if it is a space seperated file
1014        raise SyntaxError
1015   
1016    read_lines = 0
1017    while read_lines<max_read_lines:
1018        line = file_pointer.readline()
1019        #print "line",line
1020        numbers = clean_line(line,delimiter)
1021        if len(numbers) <= 1:
1022            break
1023        if line[0] == '#':
1024            continue
1025        read_lines += 1
1026        try:
1027            x = float(numbers[0])
1028            y = float(numbers[1])
1029            points.append([x,y])
1030            numbers.pop(0)
1031            numbers.pop(0)
1032            if len(header) != len(numbers):
1033                file_pointer.close() 
1034                msg = "File load error.  There might be a problem with the file header"
1035                raise SyntaxError, msg
1036            for i,num in enumerate(numbers):
1037                num.strip()
1038                if num != '\n' and num != '':
1039                    #attributes.append(float(num))
1040                    att_dict.setdefault(header[i],[]).append(float(num))
1041        #except IOError:           
1042        except ValueError:
1043            raise SyntaxError
1044    if points == []:
1045        raise StopIteration
1046       
1047   
1048    pointlist = array(points).astype(Float)
1049    for key in att_dict.keys():
1050        att_dict[key] = array(att_dict[key]).astype(Float)
1051
1052    #Do stuff here so the info is in lat's and longs
1053    geo_ref = None
1054    x_header = lower(x_header[:3])
1055    y_header = lower(y_header[:3])
1056    if (x_header == 'lon' or  x_header == 'lat') and \
1057       (y_header == 'lon' or  y_header == 'lat'):
1058        if x_header == 'lon':
1059            longitudes = ravel(pointlist[:,0:1])
1060            latitudes = ravel(pointlist[:,1:])
1061        else:
1062            latitudes = ravel(pointlist[:,0:1])
1063            longitudes = ravel(pointlist[:,1:])
1064       
1065        pointlist, geo_ref = _set_using_lat_long(latitudes,
1066                                                 longitudes,
1067                                                 geo_reference=None,
1068                                                 data_points=None,
1069                                                 points_are_lats_longs=False)
1070    return pointlist, att_dict, geo_ref, file_pointer
1071
1072def _read_pts_file_header(fid, verbose=False):
1073
1074    """Read the geo_reference and number_of_points from a .pts file
1075    """
1076   
1077    keys = fid.variables.keys()
1078    try:
1079        keys.remove('points')
1080    except IOError, e:       
1081        fid.close()   
1082        msg = 'Expected keyword "points" but could not find it'
1083        raise IOError, msg
1084    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1085   
1086    try:
1087        geo_reference = Geo_reference(NetCDFObject=fid)
1088    except AttributeError, e:
1089        geo_reference = None
1090
1091    return geo_reference, keys, fid.dimensions['number_of_points']
1092
1093def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1094                            #verbose=False):
1095   
1096
1097    """
1098    Read the body of a .csv file.
1099    header: The list header of the csv file, with the x and y labels.
1100    """
1101   
1102    pointlist = array(fid.variables['points'][start_row:fin_row])
1103   
1104    attributes = {}
1105    for key in keys:
1106        attributes[key] = array(fid.variables[key][start_row:fin_row])
1107
1108    return pointlist, attributes
1109   
1110def _write_pts_file(file_name,
1111                    write_data_points,
1112                    write_attributes=None, 
1113                    write_geo_reference=None):
1114    """
1115    Write .pts NetCDF file   
1116
1117    NOTE: Below might not be valid ask Duncan : NB 5/2006
1118   
1119    WARNING: This function mangles the point_atts data structure
1120    #F??ME: (DSG)This format has issues.
1121    # There can't be an attribute called points
1122    # consider format change
1123    # method changed by NB not sure if above statement is correct
1124   
1125    should create new test for this
1126    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1127    for key in point_atts.keys():
1128        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1129        assert key in legal_keys, msg
1130    """   
1131    from Scientific.IO.NetCDF import NetCDFFile
1132    # NetCDF file definition
1133    outfile = NetCDFFile(file_name, 'w')
1134   
1135    #Create new file
1136    outfile.institution = 'Geoscience Australia'
1137    outfile.description = 'NetCDF format for compact and portable storage ' +\
1138                          'of spatial point data'
1139   
1140    # dimension definitions
1141    shape = write_data_points.shape[0]
1142    outfile.createDimension('number_of_points', shape) 
1143    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1144   
1145    # variable definition
1146    outfile.createVariable('points', Float, ('number_of_points',
1147                                             'number_of_dimensions'))
1148
1149    #create variables 
1150    outfile.variables['points'][:] = write_data_points #.astype(Float32)
1151
1152    if write_attributes is not None:
1153        for key in write_attributes.keys():
1154            outfile.createVariable(key, Float, ('number_of_points',))
1155            outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1156       
1157    if write_geo_reference is not None:
1158        write_NetCDF_georeference(write_geo_reference, outfile)
1159       
1160    outfile.close() 
1161 
1162def _write_csv_file(file_name,
1163                    write_data_points,
1164                    write_attributes=None,
1165                    as_lat_long=False,
1166                    delimiter=','):
1167    """
1168    export a file, file_name, with the csv format
1169   
1170    """
1171    points = write_data_points
1172    pointattributes = write_attributes
1173   
1174    fd = open(file_name,'w')
1175    if as_lat_long:
1176        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1177    else:
1178        titlelist = "x" + delimiter + "y"  + delimiter
1179    if pointattributes is not None:   
1180        for title in pointattributes.keys():
1181            titlelist = titlelist + title + delimiter
1182        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1183    fd.write(titlelist+"\n")
1184   
1185    # <x/lat> <y/long> [attributes]
1186    for i, vert in enumerate( points):
1187
1188
1189        if pointattributes is not None:           
1190            attlist = ","
1191            for att in pointattributes.keys():
1192                attlist = attlist + str(pointattributes[att][i])+ delimiter
1193            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1194            attlist.strip()
1195        else:
1196            attlist = ''
1197
1198        fd.write(str(vert[0]) + delimiter +
1199                 str(vert[1]) + attlist + "\n")
1200
1201    fd.close()
1202 
1203def _write_urs_file(file_name,
1204                    points,
1205                    delimiter=' '):
1206    """
1207    export a file, file_name, with the urs format
1208    the data points are in lats and longs
1209   
1210    """   
1211    fd = open(file_name,'w')   
1212    fd.write(str(len(points))+"\n")   
1213    # <lat> <long> <id#>
1214    for i, vert in enumerate( points):
1215        fd.write(str(round(vert[0],7)) + delimiter + \
1216                 str(round(vert[1],7)) + delimiter +str(i)+ "\n")
1217    fd.close()
1218   
1219def _point_atts2array(point_atts):
1220    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1221   
1222    for key in point_atts['attributelist'].keys():
1223        point_atts['attributelist'][key]=\
1224                array(point_atts['attributelist'][key]).astype(Float)
1225    return point_atts
1226
1227   
1228
1229
1230def geospatial_data2points_dictionary(geospatial_data):
1231    """Convert geospatial data to points_dictionary
1232    """
1233
1234    points_dictionary = {}
1235    points_dictionary['pointlist'] = geospatial_data.data_points
1236
1237    points_dictionary['attributelist'] = {}
1238
1239    for attribute_name in geospatial_data.attributes.keys():
1240        val = geospatial_data.attributes[attribute_name]
1241        points_dictionary['attributelist'][attribute_name] = val
1242
1243    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1244
1245    return points_dictionary
1246
1247   
1248def points_dictionary2geospatial_data(points_dictionary):
1249    """Convert points_dictionary to geospatial data object
1250    """
1251
1252    msg = 'Points dictionary must have key pointlist' 
1253    assert points_dictionary.has_key('pointlist'), msg
1254
1255    msg = 'Points dictionary must have key attributelist'     
1256    assert points_dictionary.has_key('attributelist'), msg       
1257
1258    if points_dictionary.has_key('geo_reference'):
1259        geo = points_dictionary['geo_reference']
1260    else:
1261        geo = None
1262   
1263    return Geospatial_data(points_dictionary['pointlist'],
1264                           points_dictionary['attributelist'],
1265                           geo_reference = geo)
1266
1267def clean_line(line,delimiter):     
1268    """Remove whitespace
1269    """
1270    #print ">%s" %line
1271    line = line.strip()
1272    #print "stripped>%s" %line
1273    numbers = line.split(delimiter)
1274    i = len(numbers) - 1
1275    while i >= 0:
1276        if numbers[i] == '':
1277            numbers.pop(i)
1278        else:
1279            numbers[i] = numbers[i].strip()
1280       
1281        i += -1
1282    #for num in numbers:
1283    #    print "num>%s<" %num
1284    return numbers
1285           
1286def ensure_absolute(points, geo_reference=None):
1287    """
1288    This function inputs several formats and
1289    outputs one format. - a numeric array of absolute points.
1290
1291    Inputed formats are;
1292    points: List or numeric array of coordinate pairs [xi, eta] of
1293              points or geospatial object or points file name
1294
1295    mesh_origin: A geo_reference object or 3-tuples consisting of
1296                 UTM zone, easting and northing.
1297                 If specified vertex coordinates are assumed to be
1298                 relative to their respective origins.
1299    """
1300    if isinstance(points,type('')):
1301        #It's a string
1302        #assume it is a point file
1303        points = Geospatial_data(file_name = points)
1304       
1305    if isinstance(points,Geospatial_data):
1306        points = points.get_data_points( \
1307                absolute = True)
1308        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1309        assert geo_reference == None, msg
1310           
1311    else:
1312        points = ensure_numeric(points, Float)
1313    if geo_reference is None:
1314        geo = None #Geo_reference()
1315    else:
1316        if isinstance(geo_reference, Geo_reference):
1317            geo = geo_reference
1318        else:
1319            geo = Geo_reference(geo_reference[0],
1320                                geo_reference[1],
1321                                geo_reference[2])
1322        points = geo.get_absolute(points)
1323    return points
1324     
1325
1326def ensure_geospatial(points, geo_reference=None):
1327    """
1328    This function inputs several formats and
1329    outputs one format. - a geospatial_data instance.
1330
1331    Inputed formats are;
1332    points: List or numeric array of coordinate pairs [xi, eta] of
1333              points or geospatial object
1334
1335    mesh_origin: A geo_reference object or 3-tuples consisting of
1336                 UTM zone, easting and northing.
1337                 If specified vertex coordinates are assumed to be
1338                 relative to their respective origins.
1339    """
1340    if isinstance(points,Geospatial_data):
1341        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1342        assert geo_reference == None, msg
1343        return points   
1344    else:
1345        points = ensure_numeric(points, Float)
1346    if geo_reference is None:
1347        geo = None
1348    else:
1349        if isinstance(geo_reference, Geo_reference):
1350            geo = geo_reference
1351        else:
1352            geo = Geo_reference(geo_reference[0],
1353                                geo_reference[1],
1354                                geo_reference[2])
1355    points = Geospatial_data(data_points=points, geo_reference=geo)       
1356    return points
1357
1358
1359
1360def find_optimal_smoothing_parameter(data_file, 
1361                                     smoothing_list=None,
1362                                     attribute_smoothed='elevation', 
1363                                     mesh_file=None,
1364                                     mesh_resolution=100000,
1365                                     north_boundary=None,
1366                                     south_boundary=None,
1367                                     east_boundary=None,
1368                                     west_boundary=None,
1369                                     plot_name=None
1370                                     ):
1371    #alpha list, data file, mesh file or boundary area and resolution
1372    #plot name and attribute smoothed default elevation
1373
1374    from anuga.abstract_2d_finite_volumes import domain
1375    from anuga.shallow_water import Domain
1376    from anuga.geospatial_data.geospatial_data import Geospatial_data
1377    from anuga.coordinate_transforms import Geo_reference
1378    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1379    from anuga.shallow_water import Reflective_boundary
1380   
1381    from anuga.utilities.numerical_tools import cov
1382    from Numeric import array, resize,shape,Float,zeros,take,argsort
1383    from pylab import plot, ion, hold,savefig,semilogx,plotting
1384
1385    if north_boundary or south_boundary or east_boundary or west_boundary is None:
1386        no_boundary=True
1387       
1388
1389    if mesh_file is None:
1390        mesh_file='temp.msh'
1391       
1392        if no_boundary is True:
1393            msg= 'all boundary information or a mesh file'
1394            raise msg
1395
1396    is_inside_polygon
1397   
1398    poly_topo = [[east_boundary,south_boundary],[east_boundary,north_boundary],
1399             [west_boundary,north_boundary],[west_boundary,south_boundary]]
1400#    mesh_file = 'temp.msh'
1401                 
1402    create_mesh_from_regions(poly_topo,
1403                         boundary_tags={'back': [2], 'side': [1,3],
1404                                        'ocean': [0]},
1405                         maximum_triangle_area=mesh_resolution,
1406                         filename=mesh_file,
1407                         use_cache=True,
1408                         verbose=True)
1409
1410
1411    topo_dir_name = 'pt_hedland_export.txt'
1412#    sample = 'sample.txt'
1413#    remainder ='remainder.txt'
1414    #split topo data
1415    G = Geospatial_data(file_name = topo_dir_name)
1416    print 'start split'
1417    G_small, G_other = G.split(0.1,True)
1418
1419
1420
1421    alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0,1000.0,100000.0]
1422    #alphas = [0.001,1,100]
1423    domains = {}
1424
1425    print 'Setup computational domain'
1426    for alpha in alphas:
1427   
1428        #think about making domain absolute when it is created
1429        domain = Domain(mesh_file, use_cache=False, verbose=True)
1430        print domain.statistics()
1431        domain.set_quantity(attribute_smoothed, 
1432                    geospatial_data = G_other,
1433                    use_cache = True,
1434                    verbose = True,
1435                    alpha = alpha)
1436        domains[alpha]=domain
1437
1438    points=G_small.get_data_points()
1439    data=array([],typecode=Float)
1440    data=resize(data,(len(points),3+len(alphas)))
1441    #gets relative point from sample
1442
1443    data[:,0]=points[:,0]
1444    data[:,1]=points[:,1]
1445    print'geo',domain.geo_reference,points[0:10],points[0:10,0]
1446    elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
1447
1448    data[:,2]=elevation_sample
1449
1450    print'data',data[0:10],len(alphas)
1451    normal_cov=array(zeros([len(alphas),2]),typecode=Float)
1452    i=0
1453
1454    for domain in domains:
1455    #    print'domain',domain
1456        points_geo=domains[domain].geo_reference.change_points_geo_ref(points)
1457   
1458
1459        print'poin',points_geo[0:10],elevation_sample[0:10]
1460        print'quantities',domains[domain].get_quantity_names()
1461        elevation_predicted=domains[domain].quantities[attribute_smoothed].get_values(interpolation_points=points_geo)
1462        print'elevation_predicted',elevation_predicted[0:10],len(points),len(elevation_predicted)
1463   
1464        data[:,i+3]=elevation_predicted
1465        sample_cov= cov(elevation_sample)
1466
1467        ele_cov= cov(elevation_sample-elevation_predicted)
1468        normal_cov[i,:]= [domain,ele_cov/sample_cov]
1469
1470        print'cov',alpha,'= ',normal_cov, ele_cov, sample_cov
1471        i+=1
1472   
1473   
1474    print'data',data[100:200]
1475    print'normal cov',normal_cov
1476    normal_cov0=normal_cov[:,0]
1477    normal_cov_new=take(normal_cov,argsort(normal_cov0))
1478    semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
1479    savefig("alphas",dpi=300)
1480   
1481
1482             
1483
1484         
1485if __name__ == "__main__":
1486    pass
1487   
Note: See TracBrowser for help on using the repository browser.