source: trunk/anuga_core/source/anuga/fit_interpolate/fit.py @ 8757

Last change on this file since 8757 was 8757, checked in by steve, 12 years ago

Speed up the check_integrity procedure of neighbour_mesh

File size: 25.5 KB
Line 
1"""Least squares fitting.
2
3   Implements a penalised least-squares fit.
4   putting point data onto the mesh.
5
6   The penalty term (or smoothing term) is controlled by the smoothing
7   parameter alpha.
8   With a value of alpha=0, the fit function will attempt
9   to interpolate as closely as possible in the least-squares sense.
10   With values alpha > 0, a certain amount of smoothing will be applied.
11   A positive alpha is essential in cases where there are too few
12   data points.
13   A negative alpha is not allowed.
14   A typical value of alpha is 1.0e-6
15
16
17   Ole Nielsen, Stephen Roberts, Duncan Gray, Christopher Zoppou
18   Geoscience Australia, 2004.
19
20   TO DO
21   * test geo_ref, geo_spatial
22
23   IDEAS
24   * (DSG-) Change the interface of fit, so a domain object can
25      be passed in. (I don't know if this is feasible). If could
26      save time/memory.
27"""
28
29from anuga.abstract_2d_finite_volumes.neighbour_mesh import Mesh
30from anuga.caching import cache
31from anuga.geospatial_data.geospatial_data import Geospatial_data, \
32     ensure_absolute
33from anuga.fit_interpolate.general_fit_interpolate import FitInterpolate
34
35from anuga.utilities.sparse import Sparse_CSR
36from anuga.utilities.numerical_tools import ensure_numeric
37from anuga.utilities.cg_solve import conjugate_gradient
38from anuga.config import default_smoothing_parameter as DEFAULT_ALPHA
39import anuga.utilities.log as log
40
41import exceptions
42class TooFewPointsError(exceptions.Exception): pass
43class VertsWithNoTrianglesError(exceptions.Exception): pass
44
45import numpy as num
46import sys
47
48# Setup for c fitting routines
49#from anuga.utilities import compile
50#if compile.can_use_C_extension('fitsmooth.c'):
51import fitsmooth
52#else:
53#    msg = "C implementation of fitting algorithms (fitsmooth.c) not avalaible"
54#    raise Exception(msg)
55
56# Setup for quad_tree extension
57#from anuga.utilities import compile
58#if compile.can_use_C_extension('quad_tree_ext.c'):
59#from anuga.utilities import quad_tree_ext
60#else:
61#    msg = "C implementation of quad tree extension not avaliable"
62#    raise Exception(msg)
63
64# Setup for sparse_matrix extension
65#from anuga.utilities import compile
66#if compile.can_use_C_extension('sparse_matrix_ext.c'):
67#from anuga.utilities import sparse_matrix_ext
68#else:
69#    msg = "C implementation of sparse_matrix extension not avaliable"
70#    raise Exception(msg)
71
72
73
74class Fit(FitInterpolate):
75
76    def __init__(self,
77                 vertex_coordinates=None,
78                 triangles=None,
79                 mesh=None,
80                 mesh_origin=None,
81                 alpha=None,
82                 verbose=False,
83                 cg_precon='Jacobi',
84                 use_c_cg=True):
85
86        """
87        Padarn Note 05/12/12: This documentation should probably
88        be updated to account for the fact that the fitting is now
89        done in c. I wasn't sure what details were neccesary though.
90
91        Fit data at points to the vertices of a mesh.
92
93        Inputs:
94
95          vertex_coordinates: List of coordinate pairs [xi, eta] of
96          points constituting a mesh (or an m x 2 numeric array or
97              a geospatial object)
98              Points may appear multiple times
99              (e.g. if vertices have discontinuities)
100
101          triangles: List of 3-tuples (or a numeric array) of
102              integers representing indices of all vertices in the mesh.
103
104          mesh_origin: A geo_reference object or 3-tuples consisting of
105              UTM zone, easting and northing.
106              If specified vertex coordinates are assumed to be
107              relative to their respective origins.
108
109          Note: Don't supply a vertex coords as a geospatial object and
110              a mesh origin, since geospatial has its own mesh origin.
111
112
113        Usage,
114        To use this in a blocking way, call  build_fit_subset, with z info,
115        and then fit, with no point coord, z info.
116        """
117        # Initialise variabels
118        if alpha is None:
119            self.alpha = DEFAULT_ALPHA
120        else:
121            self.alpha = alpha
122
123        FitInterpolate.__init__(self,
124                 vertex_coordinates,
125                 triangles,
126                 mesh,
127                 mesh_origin=mesh_origin,
128                 verbose=verbose)
129
130        self.AtA = None
131        self.Atz = None
132        self.D = None
133        self.point_count = 0
134
135        # NOTE PADARN: NEEDS FIXING - currently need smoothing matrix
136        # even if alpha is zero, due to c function expecting it. This
137        # could and should be removed.
138        if True:
139            if verbose:
140                log.critical('Building smoothing matrix')
141            self.D = self._build_smoothing_matrix_D()
142
143        bd_poly = self.mesh.get_boundary_polygon()
144        self.mesh_boundary_polygon = ensure_numeric(bd_poly)
145
146        self.cg_precon=cg_precon
147        self.use_c_cg=use_c_cg
148
149    def _build_coefficient_matrix_B(self,
150                                  verbose=False):
151        """
152        Build final coefficient matrix from AtA and D
153        """
154
155        msize = self.mesh.number_of_nodes
156
157        self.B = fitsmooth.build_matrix_B(self.D, \
158                                          self.AtA, self.alpha)
159
160        # Convert self.B matrix to CSR format
161        self.B = Sparse_CSR(data=num.array(self.B[0]),\
162                                  Colind=num.array(self.B[1]),\
163                                  rowptr=num.array(self.B[2]), \
164                                  m=msize, n=msize)
165        # NOTE PADARN: The above step could potentially be removed
166        # and the sparse matrix worked with directly in C. Not sure
167        # if this would be worthwhile.
168
169    def _build_smoothing_matrix_D(self):
170        """Build m x m smoothing matrix, where
171        m is the number of basis functions phi_k (one per vertex)
172
173        The smoothing matrix is defined as
174
175        D = D1 + D2
176
177        where
178
179        [D1]_{k,l} = \int_\Omega
180           \frac{\partial \phi_k}{\partial x}
181           \frac{\partial \phi_l}{\partial x}\,
182           dx dy
183
184        [D2]_{k,l} = \int_\Omega
185           \frac{\partial \phi_k}{\partial y}
186           \frac{\partial \phi_l}{\partial y}\,
187           dx dy
188
189
190        The derivatives \frac{\partial \phi_k}{\partial x},
191        \frac{\partial \phi_k}{\partial x} for a particular triangle
192        are obtained by computing the gradient a_k, b_k for basis function k
193
194        NOTE PADARN: All of this is now done in an external C function, and the
195        result is stored in a Capsule object, meaning the entries cannot be directly
196        accessed.
197        """
198
199        # NOTE PADARN: Should the input arguments here be checked - making
200        # sure that they are floats? Not sure if this is done elsewhere.
201        # NOTE PADARN: Should global coordinates be used for the smoothing
202        # matrix, or is thids not important?
203        return fitsmooth.build_smoothing_matrix(self.mesh.triangles, \
204          self.mesh.areas, self.mesh.vertex_coordinates)
205
206
207    # NOTE PADARN: This function was added to emulate behavior of the original
208    # class not using external c functions. This method is dangerous as D could
209    # be very large - it was added as it is used in a unit test.
210    def get_D(self):
211        return fitsmooth.return_full_D(self.D, self.mesh.number_of_nodes)
212
213    # NOTE PADARN: This function was added to emulate behavior of the original
214    # class so as to pass a unit test. It is completely uneeded.
215    def build_fit_subset(self, point_coordinates, z=None, attribute_name=None,
216                              verbose=False, output='dot'):
217        self._build_matrix_AtA_Atz(point_coordinates, z, attribute_name, verbose, output)
218
219    def _build_matrix_AtA_Atz(self, point_coordinates, z=None, attribute_name=None,
220                              verbose=False, output='dot'):
221        """Build:
222        AtA  m x m  interpolation matrix, and,
223        Atz  m x a  interpolation matrix where,
224        m is the number of basis functions phi_k (one per vertex)
225        a is the number of data attributes
226
227        This algorithm uses a quad tree data structure for fast binning of
228        data points.
229
230        If Ata is None, the matrices AtA and Atz are created.
231
232        This function can be called again and again, with sub-sets of
233        the point coordinates.  Call fit to get the results.
234
235        Preconditions
236        z and points are numeric
237        Point_coordindates and mesh vertices have the same origin.
238
239        The number of attributes of the data points does not change
240        """
241
242        if isinstance(point_coordinates, Geospatial_data):
243            point_coordinates = point_coordinates.get_data_points( \
244                absolute=True)
245
246        # Convert input to numeric arrays
247        if z is not None:
248            z = ensure_numeric(z, num.float)
249        else:
250            msg = 'z not specified'
251            assert isinstance(point_coordinates, Geospatial_data), msg
252            z = point_coordinates.get_attributes(attribute_name)
253
254        point_coordinates = ensure_numeric(point_coordinates, num.float)
255
256        npts = len(z)
257        z = num.array(z)
258        # NOTE PADARN : This copy might be needed to
259        # make sure memory is contig - would be better to read in C..
260        z = z.copy()
261
262        self.point_count += z.shape[0]
263
264        zdim = 1
265        if len(z.shape) != 1:
266            zdim = z.shape[1]
267
268        [AtA, Atz] = fitsmooth.build_matrix_AtA_Atz_points(self.root.root, \
269               self.mesh.number_of_nodes, \
270               self.mesh.triangles, \
271               num.array(point_coordinates), z, zdim, npts)
272
273        if verbose and output == 'dot':
274            print '\b.',
275            sys.stdout.flush()
276        if zdim == 1:
277            Atz = num.array(Atz[0])
278        else:
279            Atz = num.array(Atz).transpose()
280
281        if self.AtA is None and self.Atz is None:
282            self.AtA = AtA
283            self.Atz = Atz
284        else:
285            fitsmooth.combine_partial_AtA_Atz(self.AtA, AtA,\
286                    self.Atz, Atz, zdim, self.mesh.number_of_nodes)
287
288    def _build_matrix_AtA_Atz_file(self, filename, attribute_name=None, max_read_lines=500,\
289                                   verbose=False):
290        """Build:
291        AtA  m x m  interpolation matrix, and,
292        Atz  m x a  interpolation matrix where,
293        m is the number of basis functions phi_k (one per vertex)
294        a is the number of data attributes
295
296        This algorithm uses a quad tree data structure for fast binning of
297        data points.
298
299        If Ata is None, the matrices AtA and Atz are created.
300
301        This function can be called again and again, with sub-sets of
302        the point coordinates.  Call fit to get the results.
303
304        Preconditions
305        z and points are numeric
306        Point_coordindates and mesh vertices have the same origin.
307
308        The number of attributes of the data points does not change
309        """
310        # PADARN NOTE: THe following block of code is translated from
311        # things that were being done in the Geospatial_data object
312        # which is no longer required if data from file in c.
313        #---------------------------------------------------------
314        # Default attribute name here seems to only have possibility
315        # of being 'None'.
316        G_data = Geospatial_data(filename,
317                                         max_read_lines=1,
318                                         load_file_now=True,
319                                         verbose=verbose)
320
321
322        if attribute_name is None:
323            if G_data.default_attribute_name is not None:
324                attribute_name = G_data.default_attribute_name
325            else:
326                attribute_name = G_data.attributes.keys()[0]
327                # above line takes the first one from keys
328
329        if verbose is True:
330            log.critical('Using attribute %s' % attribute_name)
331            log.critical('Available attributes: %s' % (G_data.attributes.keys()))
332
333        msg = 'Attribute name %s does not exist in data set' % attribute_name
334        assert G_data.attributes.has_key(attribute_name), msg
335        #---------------------------------------------------------
336
337        # MAKE SURE READING ABSOLUTE POINT COORDINATES
338        [AtA, Atz, npts] = fitsmooth.build_matrix_AtA_Atz_fileread(self.root.root, \
339               self.mesh.number_of_nodes, \
340               self.mesh.triangles, \
341               filename, \
342               attribute_name, \
343               max_read_lines)
344        self.point_count = npts
345        self.AtA = AtA
346        self.Atz = Atz
347
348    def fit(self, point_coordinates_or_filename=None, z=None,
349            verbose=False,
350            point_origin=None,
351            attribute_name=None,
352            max_read_lines=500,
353            use_blocking_option2=True):
354        """Fit a smooth surface to given 1d array of data points z.
355
356        The smooth surface is computed at each vertex in the underlying
357        mesh using the formula given in the module doc string.
358
359        Inputs:
360        point_coordinates: The co-ordinates of the data points.
361              List of coordinate pairs [x, y] of
362        data points or an nx2 numeric array or a Geospatial_data object
363              or points file filename
364          z: Single 1d vector or array of data at the point_coordinates.
365
366        """
367        if isinstance(point_coordinates_or_filename, basestring):
368            if point_coordinates_or_filename[-4:] != ".pts":
369                use_blocking_option2 = False
370        # NOTE PADARN 12/12/12: Currently trying to get all blocking to be done
371        # in c, but blocking option 1, which does everything using the python
372        # interface can be used in the meantime (much slower).
373        #-----------------------BLOCKING OPTION 1----------------------------
374        if use_blocking_option2 is False:
375            if verbose:
376                print 'Fit.fit: Initializing'
377
378            # Use blocking to load in the point info
379            if isinstance(point_coordinates_or_filename, basestring):
380                msg = "Don't set a point origin when reading from a file"
381                assert point_origin is None, msg
382                filename = point_coordinates_or_filename
383
384                G_data = Geospatial_data(filename,
385                                         max_read_lines=max_read_lines,
386                                         load_file_now=False,
387                                         verbose=verbose)
388
389                for i, geo_block in enumerate(G_data):
390
391                   # Build the array
392                    points = geo_block.get_data_points(absolute=True)
393                    z = geo_block.get_attributes(attribute_name=attribute_name)
394
395                    self._build_matrix_AtA_Atz(points, z, attribute_name, verbose)
396
397                point_coordinates = None
398
399                if verbose:
400                    print ''
401            else:
402                point_coordinates = point_coordinates_or_filename
403        #-----------------------BLOCKING OPTION 2----------------------------
404        else:
405            if verbose:
406                print 'Fit.fit: Initializing'
407
408            if isinstance(point_coordinates_or_filename, basestring):
409                msg = "Don't set a point origin when reading from a file"
410                assert point_origin is None, msg
411                filename = point_coordinates_or_filename
412
413                self._build_matrix_AtA_Atz_file(filename, attribute_name=attribute_name,\
414                                                verbose=verbose)
415
416                point_coordinates = None
417            else:
418                point_coordinates = point_coordinates_or_filename
419        #--------------------------------------------------------------------
420
421        if point_coordinates is None:
422            if verbose:
423                log.critical('Fit.fit: Warning: no data points in fit')
424            msg = 'No interpolation matrix.'
425            assert self.AtA is not None, msg
426            assert self.Atz is not None
427
428            # FIXME (DSG) - do  a message
429        else:
430            point_coordinates = ensure_absolute(point_coordinates,
431                                                geo_reference=point_origin)
432            # if isinstance(point_coordinates,Geospatial_data) and z is None:
433            # z will come from the geo-ref
434
435            self._build_matrix_AtA_Atz(point_coordinates, z, verbose=verbose, output='counter')
436
437        # Check sanity
438        m = self.mesh.number_of_nodes  # Nbr of basis functions (1/vertex)
439        n = self.point_count
440        if n < m and self.alpha == 0.0:
441            msg = 'ERROR (least_squares): Too few data points\n'
442            msg += 'There are only %d data points and alpha == 0. ' % n
443            msg += 'Need at least %d\n' % m
444            msg += 'Alternatively, set smoothing parameter alpha to a small '
445            msg += 'positive value,\ne.g. 1.0e-3.'
446            raise TooFewPointsError(msg)
447
448        self._build_coefficient_matrix_B(verbose)
449        loners = self.mesh.get_lone_vertices()
450        # FIXME  - make this as error message.
451        # test with
452        # Not_yet_test_smooth_att_to_mesh_with_excess_verts.
453        if len(loners) > 0:
454            msg = 'WARNING: (least_squares): \nVertices with no triangles\n'
455            msg += 'All vertices should be part of a triangle.\n'
456            msg += 'In the future this will be inforced.\n'
457            msg += 'The following vertices are not part of a triangle;\n'
458            msg += str(loners)
459            log.critical(msg)
460
461            #raise VertsWithNoTrianglesError(msg)
462        return conjugate_gradient(self.B, self.Atz, self.Atz,
463                                  imax=2 * len(self.Atz)+1000, use_c_cg=self.use_c_cg,
464                                  precon=self.cg_precon)
465
466
467#poin_coordiantes can also be a points file name
468
469def fit_to_mesh(point_coordinates,
470                vertex_coordinates=None,
471                triangles=None,
472                mesh=None,
473                point_attributes=None,
474                alpha=DEFAULT_ALPHA,
475                verbose=False,
476                mesh_origin=None,
477                data_origin=None,
478                max_read_lines=None,
479                attribute_name=None,
480                use_cache=False,
481                cg_precon='Jacobi',
482                use_c_cg=True):
483    """Wrapper around internal function _fit_to_mesh for use with caching.
484    """
485
486    args = (point_coordinates, )
487    kwargs = {'vertex_coordinates': vertex_coordinates,
488              'triangles': triangles,
489              'mesh': mesh,
490              'point_attributes': point_attributes,
491              'alpha': alpha,
492              'verbose': verbose,
493              'mesh_origin': mesh_origin,
494              'data_origin': data_origin,
495              'max_read_lines': max_read_lines,
496              'attribute_name': attribute_name,
497              'cg_precon': cg_precon,
498              'use_c_cg': use_c_cg
499              }
500
501    if use_cache is True:
502        if isinstance(point_coordinates, basestring):
503            # We assume that point_coordinates is the name of a .csv/.txt
504            # file which must be passed onto caching as a dependency
505            # (in case it has changed on disk)
506            dep = [point_coordinates]
507        else:
508            dep = None
509
510        return cache(_fit_to_mesh,
511                     args, kwargs,
512                     verbose=verbose,
513                     compression=False,
514                     dependencies=dep)
515    else:
516        res = apply(_fit_to_mesh,
517                     args, kwargs)
518        "print intep should go out of range"
519        return res
520
521
522# point_coordinates can also be a points file name
523
524def _fit_to_mesh(point_coordinates,
525                 vertex_coordinates=None,
526                 triangles=None,
527                 mesh=None,
528                 point_attributes=None,
529                 alpha=DEFAULT_ALPHA,
530                 verbose=False,
531                 mesh_origin=None,
532                 data_origin=None,
533                 max_read_lines=None,
534                 attribute_name=None,
535                 cg_precon='Jacobi',
536                 use_c_cg=True):
537    """
538    Fit a smooth surface to a triangulation,
539    given data points with attributes.
540
541
542        Inputs:
543        vertex_coordinates: List of coordinate pairs [xi, eta] of
544        points constituting a mesh (or an m x 2 numeric array or
545              a geospatial object)
546              Points may appear multiple times
547              (e.g. if vertices have discontinuities)
548
549          triangles: List of 3-tuples (or a numeric array) of
550          integers representing indices of all vertices in the mesh.
551
552          point_coordinates: List of coordinate pairs [x, y] of data points
553          (or an nx2 numeric array). This can also be a .csv/.txt/.pts
554          file name.
555
556          alpha: Smoothing parameter.
557
558          mesh_origin: A geo_reference object or 3-tuples consisting of
559              UTM zone, easting and northing.
560              If specified vertex coordinates are assumed to be
561              relative to their respective origins.
562
563          point_attributes: Vector or array of data at the
564                            point_coordinates.
565
566    """
567
568    if mesh is None:
569        # FIXME(DSG): Throw errors if triangles or vertex_coordinates
570        # are None
571
572        #Convert input to numeric arrays
573        triangles = ensure_numeric(triangles, num.int)
574        vertex_coordinates = ensure_absolute(vertex_coordinates,
575                                             geo_reference = mesh_origin)
576
577        if verbose:
578            log.critical('_fit_to_mesh: Building mesh')
579        mesh = Mesh(vertex_coordinates, triangles)
580
581        # Don't need this as we have just created the mesh
582        #mesh.check_integrity()
583
584    interp = Fit(mesh=mesh,
585                 verbose=verbose,
586                 alpha=alpha,
587                 cg_precon=cg_precon,
588                 use_c_cg=use_c_cg)
589
590    vertex_attributes = interp.fit(point_coordinates,
591                                   point_attributes,
592                                   point_origin=data_origin,
593                                   max_read_lines=max_read_lines,
594                                   attribute_name=attribute_name,
595                                   verbose=verbose)
596
597    # Add the value checking stuff that's in least squares.
598    # Maybe this stuff should get pushed down into Fit.
599    # at least be a method of Fit.
600    # Or intigrate it into the fit method, saving teh max and min's
601    # as att's.
602
603    return vertex_attributes
604
605
606def fit_to_mesh_file(mesh_file, point_file, mesh_output_file,
607                     alpha=DEFAULT_ALPHA, verbose= False,
608                     expand_search = False,
609                     precrop = False,
610                     display_errors = True):
611    """
612    Given a mesh file (tsh) and a point attribute file, fit
613    point attributes to the mesh and write a mesh file with the
614    results.
615
616    Note: the points file needs titles.  If you want anuga to use the tsh file,
617    make sure the title is elevation.
618
619    NOTE: Throws IOErrors, for a variety of file problems.
620   
621    """
622
623    from load_mesh.loadASCII import import_mesh_file, \
624         export_mesh_file, concatinate_attributelist
625
626    try:
627        mesh_dict = import_mesh_file(mesh_file)
628    except IOError, e:
629        if display_errors:
630            log.critical("Could not load bad file: %s" % str(e))
631        raise IOError  #Could not load bad mesh file.
632
633    vertex_coordinates = mesh_dict['vertices']
634    triangles = mesh_dict['triangles']
635    if isinstance(mesh_dict['vertex_attributes'], num.ndarray):
636        old_point_attributes = mesh_dict['vertex_attributes'].tolist()
637    else:
638        old_point_attributes = mesh_dict['vertex_attributes']
639
640    if isinstance(mesh_dict['vertex_attribute_titles'], num.ndarray):
641        old_title_list = mesh_dict['vertex_attribute_titles'].tolist()
642    else:
643        old_title_list = mesh_dict['vertex_attribute_titles']
644
645    if verbose:
646        log.critical('tsh file %s loaded' % mesh_file)
647
648    # load in the points file
649    try:
650        geo = Geospatial_data(point_file, verbose=verbose)
651    except IOError, e:
652        if display_errors:
653            log.critical("Could not load bad file: %s" % str(e))
654        raise IOError  #Re-raise exception 
655
656    point_coordinates = geo.get_data_points(absolute=True)
657    title_list, point_attributes = concatinate_attributelist( \
658        geo.get_all_attributes())
659
660    if mesh_dict.has_key('geo_reference') and \
661           not mesh_dict['geo_reference'] is None:
662        mesh_origin = mesh_dict['geo_reference'].get_origin()
663    else:
664        mesh_origin = None
665
666    if verbose:
667        log.critical("points file loaded")
668    if verbose:
669        log.critical("fitting to mesh")
670    f = fit_to_mesh(point_coordinates,
671                    vertex_coordinates,
672                    triangles,
673                    None,
674                    point_attributes,
675                    alpha = alpha,
676                    verbose = verbose,
677                    data_origin = None,
678                    mesh_origin = mesh_origin)
679    if verbose:
680        log.critical("finished fitting to mesh")
681
682    # convert array to list of lists
683    new_point_attributes = f.tolist()
684    #FIXME have this overwrite attributes with the same title - DSG
685    #Put the newer attributes last
686    if old_title_list != []:
687        old_title_list.extend(title_list)
688        #FIXME can this be done a faster way? - DSG
689        for i in range(len(old_point_attributes)):
690            old_point_attributes[i].extend(new_point_attributes[i])
691        mesh_dict['vertex_attributes'] = old_point_attributes
692        mesh_dict['vertex_attribute_titles'] = old_title_list
693    else:
694        mesh_dict['vertex_attributes'] = new_point_attributes
695        mesh_dict['vertex_attribute_titles'] = title_list
696
697    if verbose:
698        log.critical("exporting to file %s" % mesh_output_file)
699
700    try:
701        export_mesh_file(mesh_output_file, mesh_dict)
702    except IOError, e:
703        if display_errors:
704            log.critical("Could not write file %s", str(e))
705        raise IOError
Note: See TracBrowser for help on using the repository browser.