source: anuga_core/source/anuga/geospatial_data/geospatial_data.py @ 4847

Last change on this file since 4847 was 4847, checked in by nick, 16 years ago

fixed some print statement

File size: 56.1 KB
Line 
1"""Class Geospatial_data - Manipulation of locations on the planet and
2associated attributes.
3
4"""
5from sys import maxint
6from os import access, F_OK, R_OK,remove
7from types import DictType
8from warnings import warn
9from string import lower
10from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
11                        size, shape
12#from Array import tolist
13from RandomArray import randint, seed, get_seed
14from copy import deepcopy
15
16#from MA import tolist
17
18from Scientific.IO.NetCDF import NetCDFFile   
19from anuga.coordinate_transforms.lat_long_UTM_conversion import UTMtoLL   
20from anuga.utilities.numerical_tools import ensure_numeric
21from anuga.coordinate_transforms.geo_reference import Geo_reference, \
22     TitleError, DEFAULT_ZONE, ensure_geo_reference, write_NetCDF_georeference
23from anuga.coordinate_transforms.redfearn import convert_from_latlon_to_utm
24from anuga.utilities.anuga_exceptions import ANUGAError
25from anuga.config import points_file_block_line_size as MAX_READ_LINES   
26
27DEFAULT_ATTRIBUTE = 'elevation'
28
29class Geospatial_data:
30
31    def __init__(self,
32                 data_points=None, # this can also be a points file name
33                 attributes=None,
34                 geo_reference=None,
35                 default_attribute_name=None,
36                 file_name=None,
37                 latitudes=None,
38                 longitudes=None,
39                 points_are_lats_longs=False,
40                 max_read_lines=None,
41                 load_file_now=True,
42                 verbose=False):
43
44       
45        """
46        Create instance from data points and associated attributes
47
48        data_points: x,y coordinates in meters. Type must be either a
49        sequence of 2-tuples or an Mx2 Numeric array of floats.  A file name
50        with extension .txt, .cvs or .pts can also be passed in here.
51
52        attributes: Associated values for each data point. The type
53        must be either a list or an array of length M or a dictionary
54        of lists (or arrays) of length M. In the latter case the keys
55        in the dictionary represent the attribute names, in the former
56        the attribute will get the default name "elevation".
57       
58        geo_reference: Object representing the origin of the data
59        points. It contains UTM zone, easting and northing and data
60        points are assumed to be relative to this origin.
61        If geo_reference is None, the default geo ref object is used.
62
63        default_attribute_name: Name of default attribute to be used with
64        get_attribute_values. The idea is that the dataset can be
65        equipped with information about which attribute to return.
66        If None, the default is the "first"
67
68        latitudes, longitudes: Vectors of latitudes and longitudes,
69        used to specify location instead of points.
70       
71        points_are_lats_longs: Set this as true if the points are actually
72        lats and longs, not UTM
73
74        max_read_lines: The number of rows read into memory when using
75        blocking to read a file.
76
77        load_file_now:  If true the file is automatically loaded
78        into the geospatial instance. Used when blocking.
79       
80        file_name: Name of input netCDF file or .txt file. netCDF file must
81        have dimensions "points" etc.
82        .txt file is a comma seperated file with x, y and attribute
83        data.
84       
85        The first line has the titles of the columns.  The first two
86        column titles are checked to see if they start with lat or
87        long (not case sensitive).  If so the data is assumed to be
88        latitude and longitude, in decimal format and converted to
89        UTM.  Otherwise the first two columns are assumed to be the x
90        and y, and the title names acually used are ignored.
91 
92       
93        The format for a .txt file is:
94            1st line:     [column names]
95            other lines:  x y [attributes]
96
97            for example:
98            x, y, elevation, friction
99            0.6, 0.7, 4.9, 0.3
100            1.9, 2.8, 5, 0.3
101            2.7, 2.4, 5.2, 0.3
102
103        The first two columns have to be x, y or lat, long
104        coordinates.
105     
106       
107        The format for a Points dictionary is:
108
109          ['pointlist'] a 2 column array describing points. 1st column x,
110          2nd column y.
111          ['attributelist'], a dictionary of 1D arrays, representing
112          attribute values at the point.  The dictionary key is the attribute
113          header.
114          ['geo_reference'] a Geo_refernece object. Use if the point
115          information is relative. This is optional.
116            eg
117            dic['pointlist'] = [[1.0,2.0],[3.0,5.0]]
118            dic['attributelist']['elevation'] = [[7.0,5.0]
119               
120        verbose:
121
122         
123        """
124
125        if isinstance(data_points, basestring):
126            # assume data point is really a file name
127            file_name = data_points
128
129        self.set_verbose(verbose)
130        self.geo_reference=None #create the attribute
131        self.file_name = file_name
132       
133        if max_read_lines is None:
134            self.max_read_lines = MAX_READ_LINES
135        else:
136            self.max_read_lines = max_read_lines
137
138        if file_name is None:
139            if latitudes is not None or longitudes is not None or \
140                   points_are_lats_longs:
141                data_points, geo_reference =  \
142                             _set_using_lat_long(latitudes=latitudes,
143                                  longitudes=longitudes,
144                                  geo_reference=geo_reference,
145                                  data_points=data_points,
146                                  points_are_lats_longs=points_are_lats_longs)
147            self.check_data_points(data_points)
148            self.set_attributes(attributes)
149            self.set_geo_reference(geo_reference)
150            self.set_default_attribute_name(default_attribute_name)
151
152        elif load_file_now is True:
153            # watch for case where file name and points,
154            # attributes etc are provided!!
155            # if file name then all provided info will be removed!
156
157            if verbose is True:
158                if file_name is not None:
159                    print 'Loading Geospatial data from file: %s' %file_name
160           
161            self.import_points_file(file_name, verbose=verbose)
162               
163            self.check_data_points(self.data_points)
164            self.set_attributes(self.attributes) 
165            self.set_geo_reference(self.geo_reference)
166            self.set_default_attribute_name(default_attribute_name)
167
168        if verbose is True:
169            if file_name is not None:
170                print 'Geospatial data created from file: %s' %file_name
171                if load_file_now is False:
172                    print 'Data will be loaded blockwise on demand'
173
174                    if file_name.endswith('csv') or file_name.endswith('txt'):
175                        print 'ASCII formats are not that great for '
176                        print 'blockwise reading. Consider storing this'
177                        print 'data as a pts NetCDF format'
178
179    def __len__(self):
180        return len(self.data_points)
181
182    def __repr__(self):
183        return str(self.get_data_points(absolute=True))
184   
185   
186    def check_data_points(self, data_points):
187        """Checks data points
188        """
189       
190        if data_points is None:
191            self.data_points = None
192            msg = 'There is no data or file provided!'
193            raise ValueError, msg
194           
195        else:
196            self.data_points = ensure_numeric(data_points)
197            #print "self.data_points.shape",self.data_points.shape
198            if not (0,) == self.data_points.shape:
199                assert len(self.data_points.shape) == 2
200                assert self.data_points.shape[1] == 2
201
202    def set_attributes(self, attributes):
203        """Check and assign attributes dictionary
204        """
205       
206        if attributes is None:
207            self.attributes = None
208            return
209       
210        if not isinstance(attributes, DictType):
211            #Convert single attribute into dictionary
212            attributes = {DEFAULT_ATTRIBUTE: attributes}
213
214        #Check input attributes   
215        for key in attributes.keys():
216            try:
217                attributes[key] = ensure_numeric(attributes[key])
218            except:
219                msg = 'Attribute %s could not be converted' %key
220                msg += 'to a numeric vector'
221                raise msg
222
223        self.attributes = attributes   
224
225
226    def set_geo_reference(self, geo_reference):
227        """
228        Set's the georeference of geospatial.
229        It can also be used to change the georeference
230        """
231        from anuga.coordinate_transforms.geo_reference import Geo_reference
232
233        if geo_reference is None:
234            geo_reference = Geo_reference() # Use default
235        geo_reference = ensure_geo_reference(geo_reference)
236        if not isinstance(geo_reference, Geo_reference):
237            msg = 'Argument geo_reference must be a valid Geo_reference \n'
238            msg += 'object or None.'
239            raise msg
240
241        # if a geo ref already exists, change the point data to
242        # represent the new geo-ref
243        if  self.geo_reference is not None:
244            #FIXME: Maybe put out a warning here...
245            self.data_points = self.get_data_points \
246                               (geo_reference=geo_reference)
247           
248        self.geo_reference = geo_reference
249
250
251    def set_default_attribute_name(self, default_attribute_name):
252        self.default_attribute_name = default_attribute_name
253
254    def set_verbose(self, verbose=False):
255        if verbose in [False, True]:
256            self.verbose = verbose
257        else:
258            msg = 'Illegal value: %s' %str(verbose)
259            raise Exception, msg
260
261    def clip(self, polygon, closed=True):
262        """Clip geospatial data by a polygon
263
264        Input
265          polygon - Either a list of points, an Nx2 array or
266                    a Geospatial data object.
267          closed - (optional) determine whether points on boundary should be
268          regarded as belonging to the polygon (closed = True)
269          or not (closed = False). Default is True.
270           
271        Output
272          New geospatial data object representing points inside
273          specified polygon.
274       
275        """
276
277        from anuga.utilities.polygon import inside_polygon
278
279        if isinstance(polygon, Geospatial_data):
280            # Polygon is an object - extract points
281            polygon = polygon.get_data_points()
282
283        points = self.get_data_points()   
284        inside_indices = inside_polygon(points, polygon, closed)
285
286        clipped_G = self.get_sample(inside_indices)
287#        clipped_points = take(points, inside_indices)
288
289        # Clip all attributes
290#        attributes = self.get_all_attributes()
291
292#        clipped_attributes = {}
293#        if attributes is not None:
294#            for key, att in attributes.items():
295#                clipped_attributes[key] = take(att, inside_indices)
296
297#        return Geospatial_data(clipped_points, clipped_attributes)
298        return clipped_G
299       
300       
301    def clip_outside(self, polygon, closed=True):
302        """Clip geospatial date by a polygon, keeping data OUTSIDE of polygon
303
304        Input
305          polygon - Either a list of points, an Nx2 array or
306                    a Geospatial data object.
307          closed - (optional) determine whether points on boundary should be
308          regarded as belonging to the polygon (closed = True)
309          or not (closed = False). Default is True.
310         
311        Output
312          Geospatial data object representing point OUTSIDE specified polygon
313       
314        """
315
316        from anuga.utilities.polygon import outside_polygon
317
318        if isinstance(polygon, Geospatial_data):
319            # Polygon is an object - extract points
320            polygon = polygon.get_data_points()
321
322        points = self.get_data_points()   
323        outside_indices = outside_polygon(points, polygon, closed)
324
325        clipped_G = self.get_sample(outside_indices)
326
327#        clipped_points = take(points, outside_indices)
328
329        # Clip all attributes
330#        attributes = self.get_all_attributes()
331
332#        clipped_attributes = {}
333#        if attributes is not None:
334#            for key, att in attributes.items():
335#                clipped_attributes[key] = take(att, outside_indices)
336
337#        return Geospatial_data(clipped_points, clipped_attributes)
338        return clipped_G
339
340   
341    def get_geo_reference(self):
342        return self.geo_reference
343       
344    def get_data_points(self, absolute=True, geo_reference=None,
345                        as_lat_long=False,isSouthHemisphere=True):
346        """Get coordinates for all data points as an Nx2 array
347
348        If absolute is False returned coordinates are relative to the
349        internal georeference's xll and yll corners, otherwise
350        absolute UTM coordinates are returned.
351
352        If a geo_reference is passed the points are returned relative
353        to that geo_reference.
354       
355        isSH (isSouthHemisphere) is only used when getting data
356        points "as_lat_long" is True and if FALSE will return lats and
357        longs valid for the Northern Hemisphere.
358
359        Default: absolute is True.
360        """
361        if as_lat_long is True:
362            msg = "Points need a zone to be converted into lats and longs"
363            assert self.geo_reference is not None, msg
364            zone = self.geo_reference.get_zone()
365            assert self.geo_reference.get_zone() is not DEFAULT_ZONE, msg
366            lats_longs = []
367            for point in self.get_data_points(True):
368                ### UTMtoLL(northing, easting, zone,
369                lat_calced, long_calced = UTMtoLL(point[1],point[0], 
370                                                  zone, isSouthHemisphere)
371                lats_longs.append((lat_calced, long_calced)) # to hash
372            return lats_longs
373           
374        if absolute is True and geo_reference is None:
375            return self.geo_reference.get_absolute(self.data_points)
376        elif geo_reference is not None:
377            return geo_reference.change_points_geo_ref \
378                               (self.data_points, 
379                                self.geo_reference)
380        else:
381            return self.data_points
382       
383   
384    def get_attributes(self, attribute_name=None):
385        """Return values for one named attribute.
386
387        If attribute_name is None, default_attribute_name is used
388        """
389
390        if attribute_name is None:
391            if self.default_attribute_name is not None:
392                attribute_name = self.default_attribute_name
393            else:
394                attribute_name = self.attributes.keys()[0] 
395                # above line takes the first one from keys
396       
397        if self.verbose is True:
398            print 'Using attribute %s' %attribute_name
399            print 'Available attributes: %s' %(self.attributes.keys())       
400
401        msg = 'Attribute name %s does not exist in data set' %attribute_name
402        assert self.attributes.has_key(attribute_name), msg
403
404        return self.attributes[attribute_name]
405
406    def get_all_attributes(self):
407        """
408        Return values for all attributes.
409        The return value is either None or a dictionary (possibly empty).
410        """
411
412        return self.attributes
413
414    def __add__(self, other):
415        """
416        Returns the addition of 2 geospatical objects,
417        objects are concatencated to the end of each other
418           
419        NOTE: doesn't add if objects contain different
420        attributes 
421       
422        Always return absolute points!
423        This alse means, that if you add None to the object,
424        it will be turned into absolute coordinates
425
426        other can be None in which case nothing is added to self.
427        """
428
429
430        # find objects zone and checks if the same
431        geo_ref1 = self.get_geo_reference()
432        zone1 = geo_ref1.get_zone()
433
434
435        if other is not None:
436
437            geo_ref2 = other.get_geo_reference()
438            zone2 = geo_ref2.get_zone()
439
440            geo_ref1.reconcile_zones(geo_ref2)
441
442       
443            new_points = concatenate((self.get_data_points(absolute=True),
444                                      other.get_data_points(absolute=True)),
445                                      axis = 0)       
446
447       
448     
449            # Concatenate attributes if any
450            if self.attributes is None:
451                if other.attributes is not None:
452                    msg = 'Geospatial data must have the same \n'
453                    msg += 'attributes to allow addition.'
454                    raise Exception, msg
455               
456                new_attributes = None
457            else:   
458                new_attributes = {}
459                for x in self.attributes.keys():
460                    if other.attributes.has_key(x):
461
462                        attrib1 = self.attributes[x]
463                        attrib2 = other.attributes[x]
464                        new_attributes[x] = concatenate((attrib1, attrib2))
465
466                    else:
467                        msg = 'Geospatial data must have the same \n'
468                        msg += 'attributes to allow addition.'
469                        raise Exception, msg
470
471
472        else:
473            #other is None:
474           
475            new_points = self.get_data_points(absolute=True)
476            new_attributes = self.attributes
477
478                   
479
480        # Instantiate new data object and return absolute coordinates
481        new_geo_ref = Geo_reference(geo_ref1.get_zone(), 0.0, 0.0)
482        return Geospatial_data(new_points,
483                               new_attributes,
484                               new_geo_ref)
485
486
487    def __radd__(self, other):
488        """Handle cases like None + Geospatial_data(...)
489        """
490
491        return self + other
492
493   
494   
495    ###
496    #  IMPORT/EXPORT POINTS FILES
497    ###
498
499    def import_points_file(self, file_name, delimiter=None, verbose=False):
500        """ load an .txt, .csv or .pts file
501        Note: will throw an IOError/SyntaxError if it can't load the file.
502        Catch these!
503
504        Post condition: self.attributes dictionary has been set
505        """
506       
507        if access(file_name, F_OK) == 0 :
508            msg = 'File %s does not exist or is not accessible' %file_name
509            raise IOError, msg
510       
511        attributes = {}
512        if file_name[-4:]== ".pts":
513            try:
514                data_points, attributes, geo_reference =\
515                             _read_pts_file(file_name, verbose)
516            except IOError, e:   
517                msg = 'Could not open file %s ' %file_name
518                raise IOError, msg 
519       
520        elif file_name[-4:]== ".txt" or file_name[-4:]== ".csv":
521            try:
522                data_points, attributes, geo_reference =\
523                             _read_csv_file(file_name, verbose)
524            except IOError, e:
525                # This should only be if a file is not found
526                msg = 'Could not open file %s. ' %file_name
527                msg += 'Check the file location.'
528                raise IOError, msg
529            except SyntaxError, e:
530                # This should only be if there is a format error
531                msg = 'Could not open file %s. \n' %file_name
532                msg += Error_message['IOError']
533                #print "msg", msg
534                raise SyntaxError, msg
535        else:     
536            msg = 'Extension %s is unknown' %file_name[-4:]
537            raise IOError, msg
538#        print'in import data_points', data_points
539#        print'in import attributes', attributes
540#        print'in import data_points', geo_reference
541        self.data_points = data_points
542        self.attributes = attributes
543        self.geo_reference = geo_reference
544   
545#        return all_data
546   
547    def export_points_file(self, file_name, absolute=True, 
548                           as_lat_long=False, isSouthHemisphere=True):
549       
550        """
551        write a points file, file_name, as a text (.csv) or binary (.pts) file
552        file_name is the file name, including the extension
553        The point_dict is defined at the top of this file.
554       
555        If absolute is True data the xll and yll are added to the points value
556        and the xll and yll of the geo_reference are set to 0.
557       
558        If absolute is False data points at returned as relative to the xll
559        and yll and geo_reference remains uneffected
560       
561        isSouthHemisphere: is only used when getting data
562        points "as_lat_long" is True and if FALSE will return lats and
563        longs valid for the Northern Hemisphere.
564       
565        """
566
567        if (file_name[-4:] == ".pts"):
568            if absolute is True:
569                geo_ref = deepcopy(self.geo_reference)
570                geo_ref.xllcorner = 0
571                geo_ref.yllcorner = 0
572                _write_pts_file(file_name,
573                                self.get_data_points(absolute), 
574                                self.get_all_attributes(),
575                                geo_ref)
576            else:
577                _write_pts_file(file_name,
578                                self.get_data_points(absolute), 
579                                self.get_all_attributes(),
580                                self.get_geo_reference())
581               
582        elif file_name[-4:] == ".txt" or file_name[-4:] == ".csv":
583            msg = "ERROR: trying to write a .txt file with relative data."
584            assert absolute, msg
585            _write_csv_file(file_name,
586                            self.get_data_points(absolute=True,
587                                                 as_lat_long=as_lat_long,
588                                  isSouthHemisphere=isSouthHemisphere), 
589                            self.get_all_attributes(),
590                            as_lat_long=as_lat_long)
591             
592        elif file_name[-4:] == ".urs" :
593            msg = "ERROR: Can not write a .urs file as a relative file."
594            assert absolute, msg
595            _write_urs_file(file_name,
596                            self.get_data_points(as_lat_long=True,
597                                  isSouthHemisphere=isSouthHemisphere))
598                                                         
599        else:
600            msg = 'Unknown file type %s ' %file_name
601            raise IOError, msg
602       
603    def get_sample(self, indices):
604        """ Returns a object which is a subset of the original
605        and the data points and attributes in this new object refer to
606        the indices provided
607       
608        Input
609            indices- a list of integers that represent the new object
610        Output
611            New geospatial data object representing points specified by
612            the indices
613            """
614        #FIXME: add the geo_reference to this
615       
616        points = self.get_data_points()
617        sampled_points = take(points, indices)
618
619        attributes = self.get_all_attributes()
620
621        sampled_attributes = {}
622        if attributes is not None:
623            for key, att in attributes.items():
624                sampled_attributes[key] = take(att, indices)
625
626        return Geospatial_data(sampled_points, sampled_attributes)
627   
628   
629    def split(self, factor=0.5,seed_num=None, verbose=False):
630        """Returns two
631       
632        geospatial_data object, first is the size of the 'factor'
633        smaller the original and the second is the remainder. The two
634        new object are disjoin set of each other.
635       
636        Points of the two new object have selected RANDOMLY.
637        AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
638       
639        This method create two lists of indices which are passed into
640        get_sample.  The lists are created using random numbers, and
641        they are unique sets eg.  total_list(1,2,3,4,5,6,7,8,9)
642        random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
643               
644        Input - the factor which to split the object, if 0.1 then 10% of the
645            together object will be returned
646       
647        Output - two geospatial_data objects that are disjoint sets of the
648            original
649            """
650       
651        i=0
652        self_size = len(self)
653        random_list = []
654        remainder_list = []
655        new_size = round(factor*self_size)
656       
657        # Find unique random numbers
658        if verbose: print "make unique random number list and get indices"
659
660        total=array(range(self_size))
661        total_list = total.tolist()
662        if verbose: print "total list len",len(total_list)
663               
664        # There will be repeated random numbers however will not be a
665        # problem as they are being 'pop'ed out of array so if there
666        # are two numbers the same they will pop different indicies,
667        # still basically random
668        ## create list of non-unquie random numbers
669        if verbose: print "create random numbers list %s long" %new_size
670       
671        # Set seed if provided, mainly important for unit test!
672        # plus recalcule seed when no seed provided.
673        if seed_num != None:
674            seed(seed_num,seed_num)
675        else:
676            seed()
677        if verbose: print "seed:", get_seed()
678       
679        random_num = randint(0,self_size-1,(int(new_size),))
680        random_num = random_num.tolist()
681
682        #need to sort and reverse so the pop() works correctly
683        random_num.sort()
684        random_num.reverse()       
685       
686        if verbose: print "make random number list and get indices"
687        j=0
688        k=1
689        remainder_list = total_list[:]
690        # pops array index (random_num) from remainder_list
691        # (which starts as the
692        # total_list and appends to random_list 
693        random_num_len = len(random_num)
694        for i in random_num:
695            random_list.append(remainder_list.pop(i))
696            j+=1
697            #prints progress
698            if verbose and round(random_num_len/10*k)==j:
699                print '(%s/%s)' %(j, random_num_len)
700                k+=1
701       
702        # FIXME: move to tests, it might take a long time
703        # then create an array of random lenght between 500 and 1000,
704        # and use a random factor between 0 and 1
705        # setup for assertion
706        test_total = random_list[:]
707        test_total.extend(remainder_list)
708        test_total.sort() 
709        msg = 'The two random lists made from the original list when added '\
710         'together DO NOT equal the original list'
711        assert (total_list==test_total),msg
712
713        # Get new samples
714        if verbose: print "get values of indices for random list"
715        G1 = self.get_sample(random_list)
716        if verbose: print "get values of indices for opposite of random list"
717        G2 = self.get_sample(remainder_list)
718
719        return G1, G2
720
721    def __iter__(self):
722        """Read in the header, number_of_points and save the
723        file pointer position
724        """
725
726        from Scientific.IO.NetCDF import NetCDFFile
727       
728        #FIXME - what to do if the file isn't there
729
730        # FIXME (Ole): Shouldn't this go into the constructor?
731        # This method acts like the constructor when blcoking.
732        # ... and shouldn't it be called block_size?
733        #
734        if self.max_read_lines is None:
735            self.max_read_lines = MAX_READ_LINES
736       
737        if self.file_name[-4:] == ".pts":
738           
739            # See if the file is there.  Throw a QUIET IO error if it isn't
740            fd = open(self.file_name,'r')
741            fd.close()
742   
743            # Throws prints to screen if file not present
744            self.fid = NetCDFFile(self.file_name, 'r')
745           
746            self.blocking_georef, self.blocking_keys, self.number_of_points =\
747                                  _read_pts_file_header(self.fid,
748                                                        self.verbose)
749            self.start_row = 0
750            self.last_row = self.number_of_points           
751            self.show_verbose = 0
752            self.verbose_block_size = (self.last_row + 10)/10
753            self.block_number = 0
754            self.number_of_blocks = self.number_of_points/self.max_read_lines
755            # This computes the number of full blocks. The last block may be
756            # smaller and won't be ircluded in this estimate.
757           
758            if self.verbose is True:
759                print 'Reading %d points (in ~%d blocks) from file %s. '\
760                      %(self.number_of_points,
761                        self.number_of_blocks,
762                        self.file_name),
763                print 'Each block consists of %d data points'\
764                      %self.max_read_lines
765           
766        else:
767            # Assume the file is a csv file
768            file_pointer = open(self.file_name)
769            self.header, self.file_pointer = \
770                         _read_csv_file_header(file_pointer)
771            self.blocking_georef = None # Used for reconciling zones
772
773        return self
774   
775   
776    def next(self):
777        """ read a block, instanciate a new geospatial and return it"""
778       
779        if self.file_name[-4:] == ".pts":
780            if self.start_row == self.last_row:
781                # Read the end of the file last iteration
782                # Remove blocking attributes
783                self.fid.close()
784                del self.max_read_lines
785                del self.blocking_georef
786                del self.last_row
787                del self.start_row
788                del self.blocking_keys
789                del self.fid
790                raise StopIteration
791            fin_row = self.start_row + self.max_read_lines
792            if fin_row > self.last_row:
793                fin_row = self.last_row
794
795               
796           
797            if self.verbose is True:
798                if self.show_verbose >= self.start_row and \
799                       self.show_verbose < fin_row:
800
801                   
802                    #print 'Doing %d of %d' %(self.start_row, self.last_row+10)
803
804                    print 'Reading block %d (points %d to %d) out of %d'\
805                          %(self.block_number,
806                            self.start_row,
807                            fin_row,
808                            self.number_of_blocks)
809
810                   
811                    self.show_verbose += max(self.max_read_lines,
812                                             self.verbose_block_size)
813
814                 
815            # Read next block
816            pointlist, att_dict, = \
817                       _read_pts_file_blocking(self.fid,
818                                               self.start_row,
819                                               fin_row,
820                                               self.blocking_keys)
821           
822            geo = Geospatial_data(pointlist, att_dict, self.blocking_georef)
823            self.start_row = fin_row
824           
825            self.block_number += 1           
826           
827        else:
828            # Assume the file is a csv file
829            try:
830                pointlist, att_dict, geo_ref, self.file_pointer = \
831                   _read_csv_file_blocking( self.file_pointer,
832                                            self.header[:],
833                                            max_read_lines=self.max_read_lines,
834                                            verbose=self.verbose)
835
836                # Check that the zones haven't changed.
837                if geo_ref is not None:
838                    geo_ref.reconcile_zones(self.blocking_georef)
839                    self.blocking_georef = geo_ref
840                elif self.blocking_georef is not None:
841                   
842                    msg = 'Geo reference given, then not given.'
843                    msg += ' This should not happen.' 
844                    raise ValueError, msg
845                geo = Geospatial_data(pointlist, att_dict, geo_ref)
846            except StopIteration:
847                self.file_pointer.close()
848                del self.header
849                del self.file_pointer
850                raise StopIteration
851            except ANUGAError:
852                self.file_pointer.close()
853                del self.header
854                del self.file_pointer
855                raise
856            except SyntaxError:
857                self.file_pointer.close()
858                del self.header
859                del self.file_pointer
860                # This should only be if there is a format error
861                msg = 'Could not open file %s. \n' %self.file_name
862                msg += Error_message['IOError']
863                raise SyntaxError, msg
864        return geo
865
866   
867##################### Error messages ###########
868Error_message = {}
869Em = Error_message
870Em['IOError'] = "NOTE: The format for a comma separated .txt/.csv file is:\n"
871Em['IOError'] += "        1st line:     [column names]\n"
872Em['IOError'] += "        other lines:  [x value], [y value], [attributes]\n"
873Em['IOError'] += "\n"
874Em['IOError'] += "           for example:\n"
875Em['IOError'] += "           x, y, elevation, friction\n"
876Em['IOError'] += "           0.6, 0.7, 4.9, 0.3\n"
877Em['IOError'] += "           1.9, 2.8, 5, 0.3\n"
878Em['IOError'] += "           2.7, 2.4, 5.2, 0.3\n"
879Em['IOError'] += "\n"
880Em['IOError'] += "The first two columns are assumed to be x, y coordinates.\n"
881Em['IOError'] += "The attribute values must be numeric.\n"
882
883def _set_using_lat_long(latitudes,
884                        longitudes,
885                        geo_reference,
886                        data_points,
887                        points_are_lats_longs):
888    """
889    if the points has lat long info, assume it is in (lat, long) order.
890    """
891   
892    if geo_reference is not None:
893        msg = """A georeference is specified yet latitude and longitude
894        are also specified!"""
895        raise ValueError, msg
896   
897    if data_points is not None and not points_are_lats_longs:
898        msg = """Data points are specified yet latitude and
899        longitude are also specified!"""
900        raise ValueError, msg
901   
902    if points_are_lats_longs:
903        if data_points is None:
904            msg = """Data points are not specified !"""
905            raise ValueError, msg
906        lats_longs = ensure_numeric(data_points)
907        latitudes = ravel(lats_longs[:,0:1])
908        longitudes = ravel(lats_longs[:,1:])
909       
910    if latitudes is None and longitudes is None:
911        msg = """Latitudes and Longitudes are not."""
912        raise ValueError, msg
913   
914    if latitudes is None:
915        msg = """Longitudes are specified yet latitudes aren't!"""
916        raise ValueError, msg
917   
918    if longitudes is None:
919        msg = """Latitudes are specified yet longitudes aren't!"""
920        raise ValueError, msg
921   
922    data_points, zone  = convert_from_latlon_to_utm(latitudes=latitudes,
923                                                    longitudes=longitudes)
924    return data_points, Geo_reference(zone=zone)
925
926   
927def _read_pts_file(file_name, verbose=False):
928    """Read .pts NetCDF file
929   
930    Return a dic of array of points, and dic of array of attribute
931    eg
932    dic['points'] = [[1.0,2.0],[3.0,5.0]]
933    dic['attributelist']['elevation'] = [[7.0,5.0]
934    """   
935
936    from Scientific.IO.NetCDF import NetCDFFile
937   
938    if verbose: print 'Reading ', file_name
939   
940       
941    # See if the file is there.  Throw a QUIET IO error if it isn't
942    fd = open(file_name,'r')
943    fd.close()
944   
945    # Throws prints to screen if file not present
946    fid = NetCDFFile(file_name, 'r') 
947   
948    pointlist = array(fid.variables['points'])
949    keys = fid.variables.keys()
950    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
951    try:
952        keys.remove('points')
953    except IOError, e:       
954        fid.close()   
955        msg = 'Expected keyword "points" but could not find it'
956        raise IOError, msg
957   
958    attributes = {}
959    for key in keys:
960        if verbose: print "reading attribute '%s'" %key
961           
962        attributes[key] = array(fid.variables[key])
963   
964   
965    try:
966        geo_reference = Geo_reference(NetCDFObject=fid)
967    except AttributeError, e:
968        geo_reference = None
969   
970    fid.close()
971   
972    return pointlist, attributes, geo_reference
973
974
975def _read_csv_file(file_name, verbose=False):
976    """Read .csv file
977   
978    Return a dic of array of points, and dic of array of attribute
979    eg
980    dic['points'] = [[1.0,2.0],[3.0,5.0]]
981    dic['attributelist']['elevation'] = [[7.0,5.0]
982    """
983   
984    file_pointer = open(file_name)
985    header, file_pointer = _read_csv_file_header(file_pointer)
986    try:
987        pointlist, att_dict, geo_ref, file_pointer = \
988                   _read_csv_file_blocking( \
989                file_pointer,
990                header,
991                max_read_lines=1e30) #If the file is bigger that this, block..
992    except ANUGAError:
993        file_pointer.close()
994        raise 
995    file_pointer.close()
996    return pointlist, att_dict, geo_ref   
997
998CSV_DELIMITER = ','
999def _read_csv_file_header(file_pointer,
1000                          delimiter=CSV_DELIMITER,
1001                          verbose=False):
1002
1003    """Read the header of a .csv file
1004    Return a list of the header names
1005    """
1006    line = file_pointer.readline()
1007    header = clean_line(line, delimiter)
1008    return header, file_pointer
1009
1010def _read_csv_file_blocking(file_pointer, header,
1011                            delimiter=CSV_DELIMITER,
1012                            max_read_lines=MAX_READ_LINES,
1013                            verbose=False):
1014   
1015
1016    """
1017    Read the body of a .csv file.
1018    header: The list header of the csv file, with the x and y labels.
1019    """
1020    points = []
1021    pointattributes = []
1022    att_dict = {}
1023
1024    # This is to remove the x and y headers.
1025    header = header[:]
1026    try:
1027        x_header = header.pop(0)
1028        y_header = header.pop(0)
1029    except IndexError:
1030        # if there are not two columns this will occur.
1031        # eg if it is a space seperated file
1032        raise SyntaxError
1033   
1034    read_lines = 0
1035    while read_lines<max_read_lines:
1036        line = file_pointer.readline()
1037        #print "line",line
1038        numbers = clean_line(line,delimiter)
1039        if len(numbers) <= 1:
1040            break
1041        if line[0] == '#':
1042            continue
1043        read_lines += 1
1044        try:
1045            x = float(numbers[0])
1046            y = float(numbers[1])
1047            points.append([x,y])
1048            numbers.pop(0)
1049            numbers.pop(0)
1050            if len(header) != len(numbers):
1051                file_pointer.close() 
1052                msg = "File load error.  \
1053                There might be a problem with the file header"
1054                raise SyntaxError, msg
1055            for i,num in enumerate(numbers):
1056                num.strip()
1057                if num != '\n' and num != '':
1058                    #attributes.append(float(num))
1059                    att_dict.setdefault(header[i],[]).append(float(num))
1060        #except IOError:           
1061        except ValueError:
1062            raise SyntaxError
1063    if points == []:
1064        raise StopIteration
1065       
1066   
1067    pointlist = array(points).astype(Float)
1068    for key in att_dict.keys():
1069        att_dict[key] = array(att_dict[key]).astype(Float)
1070
1071    # Do stuff here so the info is in lat's and longs
1072    geo_ref = None
1073    x_header = lower(x_header[:3])
1074    y_header = lower(y_header[:3])
1075    if (x_header == 'lon' or  x_header == 'lat') and \
1076       (y_header == 'lon' or  y_header == 'lat'):
1077        if x_header == 'lon':
1078            longitudes = ravel(pointlist[:,0:1])
1079            latitudes = ravel(pointlist[:,1:])
1080        else:
1081            latitudes = ravel(pointlist[:,0:1])
1082            longitudes = ravel(pointlist[:,1:])
1083       
1084        pointlist, geo_ref = _set_using_lat_long(latitudes,
1085                                                 longitudes,
1086                                                 geo_reference=None,
1087                                                 data_points=None,
1088                                                 points_are_lats_longs=False)
1089    return pointlist, att_dict, geo_ref, file_pointer
1090
1091def _read_pts_file_header(fid, verbose=False):
1092
1093    """Read the geo_reference and number_of_points from a .pts file
1094    """
1095   
1096    keys = fid.variables.keys()
1097    try:
1098        keys.remove('points')
1099    except IOError, e:       
1100        fid.close()   
1101        msg = 'Expected keyword "points" but could not find it'
1102        raise IOError, msg
1103    if verbose: print 'Got %d variables: %s' %(len(keys), keys)
1104   
1105    try:
1106        geo_reference = Geo_reference(NetCDFObject=fid)
1107    except AttributeError, e:
1108        geo_reference = None
1109
1110    return geo_reference, keys, fid.dimensions['number_of_points']
1111
1112def _read_pts_file_blocking(fid, start_row, fin_row, keys):
1113                            #verbose=False):
1114   
1115
1116    """
1117    Read the body of a .csv file.
1118    header: The list header of the csv file, with the x and y labels.
1119    """
1120   
1121    pointlist = array(fid.variables['points'][start_row:fin_row])
1122   
1123    attributes = {}
1124    for key in keys:
1125        attributes[key] = array(fid.variables[key][start_row:fin_row])
1126
1127    return pointlist, attributes
1128   
1129def _write_pts_file(file_name,
1130                    write_data_points,
1131                    write_attributes=None, 
1132                    write_geo_reference=None):
1133    """
1134    Write .pts NetCDF file   
1135
1136    NOTE: Below might not be valid ask Duncan : NB 5/2006
1137   
1138    WARNING: This function mangles the point_atts data structure
1139    #F??ME: (DSG)This format has issues.
1140    # There can't be an attribute called points
1141    # consider format change
1142    # method changed by NB not sure if above statement is correct
1143   
1144    should create new test for this
1145    legal_keys = ['pointlist', 'attributelist', 'geo_reference']
1146    for key in point_atts.keys():
1147        msg = 'Key %s is illegal. Valid keys are %s' %(key, legal_keys)
1148        assert key in legal_keys, msg
1149    """   
1150    from Scientific.IO.NetCDF import NetCDFFile
1151    # NetCDF file definition
1152    outfile = NetCDFFile(file_name, 'w')
1153   
1154    # Create new file
1155    outfile.institution = 'Geoscience Australia'
1156    outfile.description = 'NetCDF format for compact and portable storage ' +\
1157                          'of spatial point data'
1158   
1159    # Dimension definitions
1160    shape = write_data_points.shape[0]
1161    outfile.createDimension('number_of_points', shape) 
1162    outfile.createDimension('number_of_dimensions', 2) #This is 2d data
1163   
1164    # Variable definition
1165    outfile.createVariable('points', Float, ('number_of_points',
1166                                             'number_of_dimensions'))
1167
1168    #create variables 
1169    outfile.variables['points'][:] = write_data_points #.astype(Float32)
1170
1171    if write_attributes is not None:
1172        for key in write_attributes.keys():
1173            outfile.createVariable(key, Float, ('number_of_points',))
1174            outfile.variables[key][:] = write_attributes[key] #.astype(Float32)
1175       
1176    if write_geo_reference is not None:
1177        write_NetCDF_georeference(write_geo_reference, outfile)
1178       
1179    outfile.close() 
1180 
1181def _write_csv_file(file_name,
1182                    write_data_points,
1183                    write_attributes=None,
1184                    as_lat_long=False,
1185                    delimiter=','):
1186    """
1187    export a file, file_name, with the csv format
1188   
1189    """
1190    points = write_data_points
1191    pointattributes = write_attributes
1192   
1193    fd = open(file_name,'w')
1194    if as_lat_long:
1195        titlelist = "latitude" + delimiter + "longitude"  + delimiter
1196    else:
1197        titlelist = "x" + delimiter + "y"  + delimiter
1198    if pointattributes is not None:   
1199        for title in pointattributes.keys():
1200            titlelist = titlelist + title + delimiter
1201        titlelist = titlelist[0:-len(delimiter)] # remove the last delimiter
1202    fd.write(titlelist+"\n")
1203   
1204    # <x/lat> <y/long> [attributes]
1205    for i, vert in enumerate( points):
1206
1207
1208        if pointattributes is not None:           
1209            attlist = ","
1210            for att in pointattributes.keys():
1211                attlist = attlist + str(pointattributes[att][i])+ delimiter
1212            attlist = attlist[0:-len(delimiter)] # remove the last delimiter
1213            attlist.strip()
1214        else:
1215            attlist = ''
1216
1217        fd.write(str(vert[0]) + delimiter +
1218                 str(vert[1]) + attlist + "\n")
1219
1220    fd.close()
1221 
1222def _write_urs_file(file_name,
1223                    points,
1224                    delimiter=' '):
1225    """
1226    export a file, file_name, with the urs format
1227    the data points are in lats and longs
1228   
1229    """   
1230    fd = open(file_name,'w')   
1231    fd.write(str(len(points))+"\n")   
1232    # <lat> <long> <id#>
1233    for i, vert in enumerate( points):
1234        fd.write(str(round(vert[0],7)) + delimiter + \
1235                 str(round(vert[1],7)) + delimiter +str(i)+ "\n")
1236    fd.close()
1237   
1238def _point_atts2array(point_atts):
1239    point_atts['pointlist'] = array(point_atts['pointlist']).astype(Float)
1240   
1241    for key in point_atts['attributelist'].keys():
1242        point_atts['attributelist'][key]=\
1243                array(point_atts['attributelist'][key]).astype(Float)
1244    return point_atts
1245
1246   
1247
1248
1249def geospatial_data2points_dictionary(geospatial_data):
1250    """Convert geospatial data to points_dictionary
1251    """
1252
1253    points_dictionary = {}
1254    points_dictionary['pointlist'] = geospatial_data.data_points
1255
1256    points_dictionary['attributelist'] = {}
1257
1258    for attribute_name in geospatial_data.attributes.keys():
1259        val = geospatial_data.attributes[attribute_name]
1260        points_dictionary['attributelist'][attribute_name] = val
1261
1262    points_dictionary['geo_reference'] = geospatial_data.geo_reference
1263
1264    return points_dictionary
1265
1266   
1267def points_dictionary2geospatial_data(points_dictionary):
1268    """Convert points_dictionary to geospatial data object
1269    """
1270
1271    msg = 'Points dictionary must have key pointlist' 
1272    assert points_dictionary.has_key('pointlist'), msg
1273
1274    msg = 'Points dictionary must have key attributelist'     
1275    assert points_dictionary.has_key('attributelist'), msg       
1276
1277    if points_dictionary.has_key('geo_reference'):
1278        geo = points_dictionary['geo_reference']
1279    else:
1280        geo = None
1281   
1282    return Geospatial_data(points_dictionary['pointlist'],
1283                           points_dictionary['attributelist'],
1284                           geo_reference = geo)
1285
1286def clean_line(line,delimiter):     
1287    """Remove whitespace
1288    """
1289    #print ">%s" %line
1290    line = line.strip()
1291    #print "stripped>%s" %line
1292    numbers = line.split(delimiter)
1293    i = len(numbers) - 1
1294    while i >= 0:
1295        if numbers[i] == '':
1296            numbers.pop(i)
1297        else:
1298            numbers[i] = numbers[i].strip()
1299       
1300        i += -1
1301    #for num in numbers:
1302    #    print "num>%s<" %num
1303    return numbers
1304           
1305def ensure_absolute(points, geo_reference=None):
1306    """
1307    This function inputs several formats and
1308    outputs one format. - a numeric array of absolute points.
1309
1310    Inputed formats are;
1311    points: List or numeric array of coordinate pairs [xi, eta] of
1312              points or geospatial object or points file name
1313
1314    mesh_origin: A geo_reference object or 3-tuples consisting of
1315                 UTM zone, easting and northing.
1316                 If specified vertex coordinates are assumed to be
1317                 relative to their respective origins.
1318    """
1319    if isinstance(points,type('')):
1320        #It's a string
1321        #assume it is a point file
1322        points = Geospatial_data(file_name = points)
1323       
1324    if isinstance(points,Geospatial_data):
1325        points = points.get_data_points( \
1326                absolute = True)
1327        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1328        assert geo_reference == None, msg
1329           
1330    else:
1331        points = ensure_numeric(points, Float)
1332    if geo_reference is None:
1333        geo = None #Geo_reference()
1334    else:
1335        if isinstance(geo_reference, Geo_reference):
1336            geo = geo_reference
1337        else:
1338            geo = Geo_reference(geo_reference[0],
1339                                geo_reference[1],
1340                                geo_reference[2])
1341        points = geo.get_absolute(points)
1342    return points
1343     
1344
1345def ensure_geospatial(points, geo_reference=None):
1346    """
1347    This function inputs several formats and
1348    outputs one format. - a geospatial_data instance.
1349
1350    Inputed formats are;
1351    points: List or numeric array of coordinate pairs [xi, eta] of
1352              points or geospatial object
1353
1354    mesh_origin: A geo_reference object or 3-tuples consisting of
1355                 UTM zone, easting and northing.
1356                 If specified vertex coordinates are assumed to be
1357                 relative to their respective origins.
1358    """
1359    if isinstance(points,Geospatial_data):
1360        msg = "Use a Geospatial_data object or a mesh origin. Not both."
1361        assert geo_reference == None, msg
1362        return points   
1363    else:
1364        points = ensure_numeric(points, Float)
1365    if geo_reference is None:
1366        geo = None
1367    else:
1368        if isinstance(geo_reference, Geo_reference):
1369            geo = geo_reference
1370        else:
1371            geo = Geo_reference(geo_reference[0],
1372                                geo_reference[1],
1373                                geo_reference[2])
1374    points = Geospatial_data(data_points=points, geo_reference=geo)       
1375    return points
1376
1377
1378
1379def find_optimal_smoothing_parameter(data_file, 
1380                                     alpha_list=None,
1381                                     mesh_file=None,
1382                                     boundary_poly=None,
1383                                     mesh_resolution=100000,
1384                                     north_boundary=None,
1385                                     south_boundary=None,
1386                                     east_boundary=None,
1387                                     west_boundary=None,
1388                                     plot_name='all_alphas',
1389                                     seed_num=None,
1390                                     cache=False,
1391                                     verbose=False
1392                                     ):
1393   
1394    """
1395    data_file: must not contain points outside the boundaries defined
1396           and it either a pts, txt or csv file.
1397   
1398    alpha_list: the alpha values to test in a single list
1399   
1400    mesh_file: name of the created mesh file or if passed in will read it.
1401            NOTE, if there is a mesh file mesh_resolution,
1402            north_boundary, south... etc will be ignored.
1403   
1404    mesh_resolution: the maximum area size for a triangle
1405   
1406    north_boundary... west_boundary: the value of the boundary
1407   
1408    plot_name: the name for the plot contain the results
1409   
1410    seed_num: the seed to the random number generator
1411   
1412    USAGE:
1413        value, alpha = find_optimal_smoothing_parameter(data_file=fileName,
1414                                             alpha_list=[0.0001, 0.01, 1],
1415                                             mesh_file=None,
1416                                             mesh_resolution=3,
1417                                             north_boundary=5,
1418                                             south_boundary=-5,
1419                                             east_boundary=5,
1420                                             west_boundary=-5,
1421                                             plot_name='all_alphas',
1422                                             seed_num=100000,
1423                                             verbose=False)
1424   
1425    OUTPUT: returns the minumum normalised covalance calculate AND the
1426           alpha that created it. PLUS writes a plot of the results
1427           
1428    NOTE: code will not work if the data_file extend is greater than the
1429    boundary_polygon or the north_boundary...west_boundary
1430       
1431    """
1432
1433    from anuga.shallow_water import Domain
1434    from anuga.geospatial_data.geospatial_data import Geospatial_data
1435    from anuga.pmesh.mesh_interface import create_mesh_from_regions
1436   
1437    from anuga.utilities.numerical_tools import cov
1438    from Numeric import array, resize,shape,Float,zeros,take,argsort,argmin
1439    from anuga.utilities.polygon import is_inside_polygon
1440
1441    attribute_smoothed='elevation'
1442
1443    if mesh_file is None:
1444        mesh_file='temp.msh'
1445
1446        if north_boundary is None or south_boundary is None or \
1447           east_boundary is None or west_boundary is None:
1448            no_boundary=True
1449        else:
1450            no_boundary=False
1451       
1452        if no_boundary is True:
1453            msg= 'All boundaries must be defined'
1454            raise msg
1455
1456        poly_topo = [[east_boundary,south_boundary],
1457                     [east_boundary,north_boundary],
1458                     [west_boundary,north_boundary],
1459                     [west_boundary,south_boundary]]
1460                     
1461        create_mesh_from_regions(poly_topo,
1462                                 boundary_tags={'back': [2],
1463                                                'side': [1,3],
1464                                                'ocean': [0]},
1465                             maximum_triangle_area=mesh_resolution,
1466                             filename=mesh_file,
1467                             use_cache=cache,
1468                             verbose=verbose)
1469
1470    else: # if mesh file provided
1471        #test mesh file exists?
1472        if access(mesh_file,F_OK) == 0:
1473            msg="file %s doesn't exist!" %mesh_file
1474            raise IOError, msg
1475
1476    #split topo data
1477    G = Geospatial_data(file_name = data_file)
1478    if verbose: print 'start split'
1479    G_small, G_other = G.split(0.1,seed_num, verbose=verbose)
1480    if verbose: print 'finish split'
1481    points=G_small.get_data_points()
1482
1483    #FIXME: Remove points outside boundary polygon
1484#    print 'new point',len(points)
1485#   
1486#    new_points=[]
1487#    new_points=array([],typecode=Float)
1488#    new_points=resize(new_points,(len(points),2))
1489#    print "BOUNDARY", boundary_poly
1490#    for i,point in enumerate(points):
1491#        if is_inside_polygon(point,boundary_poly, verbose=True):
1492#            new_points[i] = point
1493#            print"WOW",i,new_points[i]
1494#    points = new_points
1495
1496   
1497    if verbose: print "Number of points in sample to compare: ", len(points)
1498   
1499    if alpha_list==None:
1500        alphas = [0.001,0.01,100]
1501        #alphas = [0.000001, 0.00001, 0.0001, 0.001, 0.01,\
1502         #         0.1, 1.0, 10.0, 100.0,1000.0,10000.0]
1503       
1504    else:
1505        alphas=alpha_list
1506    domains = {}
1507
1508    if verbose: print 'Setup computational domains with different alphas'
1509    for alpha in alphas:
1510        #add G_other data to domains with different alphas
1511        if verbose:print '\n Calculating domain and mesh for Alpha = ',alpha,'\n'
1512        domain = Domain(mesh_file, use_cache=cache, verbose=verbose)
1513        if verbose:print domain.statistics()
1514        domain.set_quantity(attribute_smoothed, 
1515                    geospatial_data = G_other,
1516                    use_cache = cache,
1517                    verbose = verbose,
1518                    alpha = alpha)
1519        domains[alpha]=domain
1520
1521    #creates array with columns 1 and 2 are x, y. column 3 is elevation
1522    #4 onwards is the elevation_predicted using the alpha, which will
1523    #be compared later against the real removed data
1524    data=array([],typecode=Float)
1525
1526    data=resize(data,(len(points),3+len(alphas)))
1527
1528    #gets relative point from sample
1529    data[:,0]=points[:,0]
1530    data[:,1]=points[:,1]
1531    elevation_sample=G_small.get_attributes(attribute_name=attribute_smoothed)
1532    data[:,2]=elevation_sample
1533
1534    normal_cov=array(zeros([len(alphas),2]),typecode=Float)
1535
1536    if verbose: print 'Determine difference between predicted results and actual data'
1537    for i,alpha in enumerate(domains):
1538        if verbose: print'Alpha =',alpha
1539       
1540        points_geo=domains[alpha].geo_reference.change_points_geo_ref(points)
1541        #returns the predicted elevation of the points that were "split" out
1542        #of the original data set for one particular alpha
1543        elevation_predicted=domains[alpha].quantities[attribute_smoothed].\
1544                            get_values(interpolation_points=points_geo)
1545 
1546        #add predicted elevation to array that starts with x, y, z...
1547        data[:,i+3]=elevation_predicted
1548
1549        sample_cov= cov(elevation_sample)
1550        #print elevation_predicted
1551        ele_cov= cov(elevation_sample-elevation_predicted)
1552        normal_cov[i,:]= [alpha,ele_cov/sample_cov]
1553
1554        if verbose: print'cov',normal_cov[i][0],'= ',normal_cov[i][1]
1555
1556    normal_cov0=normal_cov[:,0]
1557    normal_cov_new=take(normal_cov,argsort(normal_cov0))
1558
1559    if plot_name is not None:
1560
1561        from pylab import savefig,semilogx,loglog
1562   
1563        semilogx(normal_cov_new[:,0],normal_cov_new[:,1])
1564        loglog(normal_cov_new[:,0],normal_cov_new[:,1])
1565        savefig(plot_name,dpi=300)
1566    if mesh_file == 'temp.msh':
1567        remove(mesh_file)
1568   
1569    return min(normal_cov_new[:,1]) , normal_cov_new[(argmin(normal_cov_new,axis=0))[1],0]
1570
1571         
1572if __name__ == "__main__":
1573    pass
1574   
Note: See TracBrowser for help on using the repository browser.