source: trunk/anuga_core/source/anuga/fit_interpolate/fit.py @ 8879

Last change on this file since 8879 was 8808, checked in by wilsonp, 12 years ago

Removed netcdf dependence from fit code and compile. Also fixed fitsmooth to use OpenMP when building AtA directly from point coordinates. Changed the default blocking size for reading files in config.py

File size: 21.6 KB
Line 
1"""Least squares fitting.
2
3   Implements a penalised least-squares fit.
4   putting point data onto the mesh.
5
6   The penalty term (or smoothing term) is controlled by the smoothing
7   parameter alpha.
8   With a value of alpha=0, the fit function will attempt
9   to interpolate as closely as possible in the least-squares sense.
10   With values alpha > 0, a certain amount of smoothing will be applied.
11   A positive alpha is essential in cases where there are too few
12   data points.
13   A negative alpha is not allowed.
14   A typical value of alpha is 1.0e-6
15
16
17   Ole Nielsen, Stephen Roberts, Duncan Gray, Christopher Zoppou
18   Geoscience Australia, 2004.
19
20   TO DO
21   * test geo_ref, geo_spatial
22
23   IDEAS
24   * (DSG-) Change the interface of fit, so a domain object can
25      be passed in. (I don't know if this is feasible). If could
26      save time/memory.
27"""
28
29from anuga.abstract_2d_finite_volumes.neighbour_mesh import Mesh
30from anuga.caching import cache
31from anuga.geospatial_data.geospatial_data import Geospatial_data, \
32     ensure_absolute
33from anuga.fit_interpolate.general_fit_interpolate import FitInterpolate
34
35from anuga.utilities.sparse import Sparse_CSR
36from anuga.utilities.numerical_tools import ensure_numeric
37from anuga.utilities.cg_solve import conjugate_gradient
38from anuga.config import default_smoothing_parameter as DEFAULT_ALPHA
39import anuga.utilities.log as log
40
41import exceptions
42class TooFewPointsError(exceptions.Exception): pass
43class VertsWithNoTrianglesError(exceptions.Exception): pass
44
45import numpy as num
46import sys
47
48#----------------------------------------------
49# C code to build interpolation matrices
50#----------------------------------------------
51import fitsmooth
52
53
54class Fit(FitInterpolate):
55
56    def __init__(self,
57                 vertex_coordinates=None,
58                 triangles=None,
59                 mesh=None,
60                 mesh_origin=None,
61                 alpha=None,
62                 verbose=False,
63                 cg_precon='Jacobi',
64                 use_c_cg=True):
65
66        """
67        Padarn Note 05/12/12: This documentation should probably
68        be updated to account for the fact that the fitting is now
69        done in C. I wasn't sure what details were neccesary though.
70
71        Fit data at points to the vertices of a mesh.
72
73        Inputs:
74
75          vertex_coordinates: List of coordinate pairs [xi, eta] of
76          points constituting a mesh (or an m x 2 numeric array or
77              a geospatial object)
78              Points may appear multiple times
79              (e.g. if vertices have discontinuities)
80
81          triangles: List of 3-tuples (or a numeric array) of
82              integers representing indices of all vertices in the mesh.
83
84          mesh_origin: A geo_reference object or 3-tuples consisting of
85              UTM zone, easting and northing.
86              If specified vertex coordinates are assumed to be
87              relative to their respective origins.
88
89          Note: Don't supply a vertex coords as a geospatial object and
90              a mesh origin, since geospatial has its own mesh origin.
91
92
93        Usage,
94        To use this in a blocking way, call  build_fit_subset, with z info,
95        and then fit, with no point coord, z info.
96        """
97        # Initialise variabels
98        if alpha is None:
99            self.alpha = DEFAULT_ALPHA
100        else:
101            self.alpha = alpha
102
103        FitInterpolate.__init__(self,
104                 vertex_coordinates,
105                 triangles,
106                 mesh,
107                 mesh_origin=mesh_origin,
108                 verbose=verbose)
109
110        self.AtA = None
111        self.Atz = None
112        self.D = None
113        self.point_count = 0
114
115        # NOTE PADARN: NEEDS FIXING - currently need smoothing matrix
116        # even if alpha is zero, due to C function expecting it. This
117        # could and should be removed.
118        if True:
119            if verbose:
120                log.critical('Building smoothing matrix')
121            self.D = self._build_smoothing_matrix_D()
122
123        bd_poly = self.mesh.get_boundary_polygon()
124        self.mesh_boundary_polygon = ensure_numeric(bd_poly)
125
126        self.cg_precon=cg_precon
127        self.use_c_cg=use_c_cg
128
129    def _build_coefficient_matrix_B(self,
130                                  verbose=False):
131        """
132        Build final coefficient matrix from AtA and D
133        """
134
135        msize = self.mesh.number_of_nodes
136
137        self.B = fitsmooth.build_matrix_B(self.D, \
138                                          self.AtA, self.alpha)
139
140        # Convert self.B matrix to CSR format
141        self.B = Sparse_CSR(data=num.array(self.B[0]),\
142                                  Colind=num.array(self.B[1]),\
143                                  rowptr=num.array(self.B[2]), \
144                                  m=msize, n=msize)
145        # NOTE PADARN: The above step could potentially be removed
146        # and the sparse matrix worked with directly in C. Not sure
147        # if this would be worthwhile.
148
149    def _build_smoothing_matrix_D(self):
150        """Build m x m smoothing matrix, where
151        m is the number of basis functions phi_k (one per vertex)
152
153        The smoothing matrix is defined as
154
155        D = D1 + D2
156
157        where
158
159        [D1]_{k,l} = \int_\Omega
160           \frac{\partial \phi_k}{\partial x}
161           \frac{\partial \phi_l}{\partial x}\,
162           dx dy
163
164        [D2]_{k,l} = \int_\Omega
165           \frac{\partial \phi_k}{\partial y}
166           \frac{\partial \phi_l}{\partial y}\,
167           dx dy
168
169
170        The derivatives \frac{\partial \phi_k}{\partial x},
171        \frac{\partial \phi_k}{\partial x} for a particular triangle
172        are obtained by computing the gradient a_k, b_k for basis function k
173
174        NOTE PADARN: All of this is now done in an external C function, and the
175        result is stored in a Capsule object, meaning the entries cannot be directly
176        accessed.
177        """
178
179        # NOTE PADARN: Should the input arguments here be checked - making
180        # sure that they are floats? Not sure if this is done elsewhere.
181        # NOTE PADARN: Should global coordinates be used for the smoothing
182        # matrix, or is thids not important?
183        return fitsmooth.build_smoothing_matrix(self.mesh.triangles, \
184          self.mesh.areas, self.mesh.vertex_coordinates)
185
186
187    # NOTE PADARN: This function was added to emulate behavior of the original
188    # class not using external C functions. This method is dangerous as D could
189    # be very large - it was added as it is used in a unit test.
190    def get_D(self):
191        return fitsmooth.return_full_D(self.D, self.mesh.number_of_nodes)
192
193    # NOTE PADARN: This function was added to emulate behavior of the original
194    # class so as to pass a unit test. It is completely unneeded.
195    def build_fit_subset(self, point_coordinates, z=None, attribute_name=None,
196                              verbose=False, output='dot'):
197        self._build_matrix_AtA_Atz(point_coordinates, z, attribute_name, verbose, output)
198
199    def _build_matrix_AtA_Atz(self, point_coordinates, z=None, attribute_name=None,
200                              verbose=False, output='dot'):
201        """Build:
202        AtA  m x m  interpolation matrix, and,
203        Atz  m x a  interpolation matrix where,
204        m is the number of basis functions phi_k (one per vertex)
205        a is the number of data attributes
206
207        This algorithm uses a quad tree data structure for fast binning of
208        data points.
209
210        If Ata is None, the matrices AtA and Atz are created.
211
212        This function can be called again and again, with sub-sets of
213        the point coordinates.  Call fit to get the results.
214
215        Preconditions
216        z and points are numeric
217        Point_coordindates and mesh vertices have the same origin.
218
219        The number of attributes of the data points does not change
220        """
221
222        if isinstance(point_coordinates, Geospatial_data):
223            point_coordinates = point_coordinates.get_data_points( \
224                absolute=True)
225
226        # Convert input to numeric arrays
227        if z is not None:
228            z = ensure_numeric(z, num.float)
229        else:
230            msg = 'z not specified'
231            assert isinstance(point_coordinates, Geospatial_data), msg
232            z = point_coordinates.get_attributes(attribute_name)
233
234        point_coordinates = ensure_numeric(point_coordinates, num.float)
235
236        npts = len(z)
237        z = num.array(z)
238        # NOTE PADARN : This copy might be needed to
239        # make sure memory is contig - would be better to read in C..
240        z = z.copy()
241
242        self.point_count += z.shape[0]
243
244        zdim = 1
245        if len(z.shape) != 1:
246            zdim = z.shape[1]
247
248        [AtA, Atz] = fitsmooth.build_matrix_AtA_Atz_points(self.root.root, \
249               self.mesh.number_of_nodes, \
250               self.mesh.triangles, \
251               num.array(point_coordinates), z, zdim, npts)
252
253        if verbose and output == 'dot':
254            print '\b.',
255            sys.stdout.flush()
256        if zdim == 1:
257            Atz = num.array(Atz[0])
258        else:
259            Atz = num.array(Atz).transpose()
260
261        if self.AtA is None and self.Atz is None:
262            self.AtA = AtA
263            self.Atz = Atz
264        else:
265            fitsmooth.combine_partial_AtA_Atz(self.AtA, AtA, \
266                    self.Atz, Atz, zdim, self.mesh.number_of_nodes)
267
268    def fit(self, point_coordinates_or_filename=None, z=None,
269            verbose=False,
270            point_origin=None,
271            attribute_name=None,
272            max_read_lines=1e7):
273        """Fit a smooth surface to given 1d array of data points z.
274
275        The smooth surface is computed at each vertex in the underlying
276        mesh using the formula given in the module doc string.
277
278        Inputs:
279        point_coordinates_or_filename: The co-ordinates of the data points.
280              A filename of a .pts file or a
281              List of coordinate pairs [x, y] of
282              data points or an nx2 numeric array or a Geospatial_data object
283              or points file filename
284          z: Single 1d vector or array of data at the point_coordinates.
285
286        """
287        if isinstance(point_coordinates_or_filename, basestring):
288            if point_coordinates_or_filename[-4:] != ".pts":
289                use_blocking_option2 = False
290
291        # NOTE PADARN 29/03/13: File reading from C has been removed. Now
292        # the input is either a set of points, or a filename which is then
293        # handled by the Geospatial_data object
294
295        if verbose:
296            print 'Fit.fit: Initializing'
297
298        # Use blocking to load in the point info
299        if isinstance(point_coordinates_or_filename, basestring):
300            msg = "Don't set a point origin when reading from a file"
301            assert point_origin is None, msg
302            filename = point_coordinates_or_filename
303
304            G_data = Geospatial_data(filename,
305                                     max_read_lines=max_read_lines,
306                                     load_file_now=False,
307                                     verbose=verbose)
308
309            for i, geo_block in enumerate(G_data):
310
311               # Build the array
312                points = geo_block.get_data_points(absolute=True)
313                z = geo_block.get_attributes(attribute_name=attribute_name)
314
315                self._build_matrix_AtA_Atz(points, z, attribute_name, verbose)
316
317            point_coordinates = None
318
319            if verbose:
320                print ''
321        else:
322            point_coordinates = point_coordinates_or_filename
323
324
325        # This condition either means a filename was read or the function
326        # recieved a None as input
327        if point_coordinates is None:
328            if verbose:
329                log.critical('Fit.fit: Warning: no data points in fit')
330            msg = 'No interpolation matrix.'
331            assert self.AtA is not None, msg
332            assert self.Atz is not None
333
334
335        else:
336            point_coordinates = ensure_absolute(point_coordinates,
337                                                geo_reference=point_origin)
338            # if isinstance(point_coordinates,Geospatial_data) and z is None:
339            # z will come from the geo-ref
340
341            self._build_matrix_AtA_Atz(point_coordinates, z, verbose=verbose, output='counter')
342
343        # Check sanity
344        m = self.mesh.number_of_nodes  # Nbr of basis functions (1/vertex)
345        n = self.point_count
346        if n < m and self.alpha == 0.0:
347            msg = 'ERROR (least_squares): Too few data points\n'
348            msg += 'There are only %d data points and alpha == 0. ' % n
349            msg += 'Need at least %d\n' % m
350            msg += 'Alternatively, set smoothing parameter alpha to a small '
351            msg += 'positive value,\ne.g. 1.0e-3.'
352            raise TooFewPointsError(msg)
353
354        self._build_coefficient_matrix_B(verbose)
355        loners = self.mesh.get_lone_vertices()
356        # FIXME  - make this as error message.
357        # test with
358        # Not_yet_test_smooth_att_to_mesh_with_excess_verts.
359        if len(loners) > 0:
360            msg = 'WARNING: (least_squares): \nVertices with no triangles\n'
361            msg += 'All vertices should be part of a triangle.\n'
362            msg += 'In the future this will be inforced.\n'
363            msg += 'The following vertices are not part of a triangle;\n'
364            msg += str(loners)
365            log.critical(msg)
366
367            #raise VertsWithNoTrianglesError(msg)
368        return conjugate_gradient(self.B, self.Atz, self.Atz,
369                                  imax=2 * len(self.Atz)+1000, use_c_cg=self.use_c_cg,
370                                  precon=self.cg_precon)
371
372
373#poin_coordiantes can also be a points file name
374
375def fit_to_mesh(point_coordinates,
376                vertex_coordinates=None,
377                triangles=None,
378                mesh=None,
379                point_attributes=None,
380                alpha=DEFAULT_ALPHA,
381                verbose=False,
382                mesh_origin=None,
383                data_origin=None,
384                max_read_lines=None,
385                attribute_name=None,
386                use_cache=False,
387                cg_precon='Jacobi',
388                use_c_cg=True):
389    """Wrapper around internal function _fit_to_mesh for use with caching.
390    """
391
392    args = (point_coordinates, )
393    kwargs = {'vertex_coordinates': vertex_coordinates,
394              'triangles': triangles,
395              'mesh': mesh,
396              'point_attributes': point_attributes,
397              'alpha': alpha,
398              'verbose': verbose,
399              'mesh_origin': mesh_origin,
400              'data_origin': data_origin,
401              'max_read_lines': max_read_lines,
402              'attribute_name': attribute_name,
403              'cg_precon': cg_precon,
404              'use_c_cg': use_c_cg
405              }
406
407    if use_cache is True:
408        if isinstance(point_coordinates, basestring):
409            # We assume that point_coordinates is the name of a .csv/.txt
410            # file which must be passed onto caching as a dependency
411            # (in case it has changed on disk)
412            dep = [point_coordinates]
413        else:
414            dep = None
415
416        return cache(_fit_to_mesh,
417                     args, kwargs,
418                     verbose=verbose,
419                     compression=False,
420                     dependencies=dep)
421    else:
422        res = apply(_fit_to_mesh,
423                     args, kwargs)
424        "print intep should go out of range"
425        return res
426
427
428# point_coordinates can also be a points file name
429
430def _fit_to_mesh(point_coordinates,
431                 vertex_coordinates=None,
432                 triangles=None,
433                 mesh=None,
434                 point_attributes=None,
435                 alpha=DEFAULT_ALPHA,
436                 verbose=False,
437                 mesh_origin=None,
438                 data_origin=None,
439                 max_read_lines=None,
440                 attribute_name=None,
441                 cg_precon='Jacobi',
442                 use_c_cg=True):
443    """
444    Fit a smooth surface to a triangulation,
445    given data points with attributes.
446
447
448        Inputs:
449        vertex_coordinates: List of coordinate pairs [xi, eta] of
450        points constituting a mesh (or an m x 2 numeric array or
451              a geospatial object)
452              Points may appear multiple times
453              (e.g. if vertices have discontinuities)
454
455          triangles: List of 3-tuples (or a numeric array) of
456          integers representing indices of all vertices in the mesh.
457
458          point_coordinates: List of coordinate pairs [x, y] of data points
459          (or an nx2 numeric array). This can also be a .csv/.txt/.pts
460          file name.
461
462          alpha: Smoothing parameter.
463
464          mesh_origin: A geo_reference object or 3-tuples consisting of
465              UTM zone, easting and northing.
466              If specified vertex coordinates are assumed to be
467              relative to their respective origins.
468
469          point_attributes: Vector or array of data at the
470                            point_coordinates.
471
472    """
473
474    if mesh is None:
475        # FIXME(DSG): Throw errors if triangles or vertex_coordinates
476        # are None
477
478        #Convert input to numeric arrays
479        triangles = ensure_numeric(triangles, num.int)
480        vertex_coordinates = ensure_absolute(vertex_coordinates,
481                                             geo_reference = mesh_origin)
482
483        if verbose:
484            log.critical('_fit_to_mesh: Building mesh')
485        mesh = Mesh(vertex_coordinates, triangles)
486
487        # Don't need this as we have just created the mesh
488        #mesh.check_integrity()
489
490    interp = Fit(mesh=mesh,
491                 verbose=verbose,
492                 alpha=alpha,
493                 cg_precon=cg_precon,
494                 use_c_cg=use_c_cg)
495
496    vertex_attributes = interp.fit(point_coordinates,
497                                   point_attributes,
498                                   point_origin=data_origin,
499                                   max_read_lines=max_read_lines,
500                                   attribute_name=attribute_name,
501                                   verbose=verbose)
502
503    # Add the value checking stuff that's in least squares.
504    # Maybe this stuff should get pushed down into Fit.
505    # at least be a method of Fit.
506    # Or intigrate it into the fit method, saving teh max and min's
507    # as att's.
508
509    return vertex_attributes
510
511
512def fit_to_mesh_file(mesh_file, point_file, mesh_output_file,
513                     alpha=DEFAULT_ALPHA, verbose= False,
514                     expand_search = False,
515                     precrop = False,
516                     display_errors = True):
517    """
518    Given a mesh file (tsh) and a point attribute file, fit
519    point attributes to the mesh and write a mesh file with the
520    results.
521
522    Note: the points file needs titles.  If you want anuga to use the tsh file,
523    make sure the title is elevation.
524
525    NOTE: Throws IOErrors, for a variety of file problems.
526   
527    """
528
529    from load_mesh.loadASCII import import_mesh_file, \
530         export_mesh_file, concatinate_attributelist
531
532    try:
533        mesh_dict = import_mesh_file(mesh_file)
534    except IOError, e:
535        if display_errors:
536            log.critical("Could not load bad file: %s" % str(e))
537        raise IOError  #Could not load bad mesh file.
538
539    vertex_coordinates = mesh_dict['vertices']
540    triangles = mesh_dict['triangles']
541    if isinstance(mesh_dict['vertex_attributes'], num.ndarray):
542        old_point_attributes = mesh_dict['vertex_attributes'].tolist()
543    else:
544        old_point_attributes = mesh_dict['vertex_attributes']
545
546    if isinstance(mesh_dict['vertex_attribute_titles'], num.ndarray):
547        old_title_list = mesh_dict['vertex_attribute_titles'].tolist()
548    else:
549        old_title_list = mesh_dict['vertex_attribute_titles']
550
551    if verbose:
552        log.critical('tsh file %s loaded' % mesh_file)
553
554    # load in the points file
555    try:
556        geo = Geospatial_data(point_file, verbose=verbose)
557    except IOError, e:
558        if display_errors:
559            log.critical("Could not load bad file: %s" % str(e))
560        raise IOError  #Re-raise exception 
561
562    point_coordinates = geo.get_data_points(absolute=True)
563    title_list, point_attributes = concatinate_attributelist( \
564        geo.get_all_attributes())
565
566    if mesh_dict.has_key('geo_reference') and \
567           not mesh_dict['geo_reference'] is None:
568        mesh_origin = mesh_dict['geo_reference'].get_origin()
569    else:
570        mesh_origin = None
571
572    if verbose:
573        log.critical("points file loaded")
574    if verbose:
575        log.critical("fitting to mesh")
576    f = fit_to_mesh(point_coordinates,
577                    vertex_coordinates,
578                    triangles,
579                    None,
580                    point_attributes,
581                    alpha = alpha,
582                    verbose = verbose,
583                    data_origin = None,
584                    mesh_origin = mesh_origin)
585    if verbose:
586        log.critical("finished fitting to mesh")
587
588    # convert array to list of lists
589    new_point_attributes = f.tolist()
590    #FIXME have this overwrite attributes with the same title - DSG
591    #Put the newer attributes last
592    if old_title_list != []:
593        old_title_list.extend(title_list)
594        #FIXME can this be done a faster way? - DSG
595        for i in range(len(old_point_attributes)):
596            old_point_attributes[i].extend(new_point_attributes[i])
597        mesh_dict['vertex_attributes'] = old_point_attributes
598        mesh_dict['vertex_attribute_titles'] = old_title_list
599    else:
600        mesh_dict['vertex_attributes'] = new_point_attributes
601        mesh_dict['vertex_attribute_titles'] = title_list
602
603    if verbose:
604        log.critical("exporting to file %s" % mesh_output_file)
605
606    try:
607        export_mesh_file(mesh_output_file, mesh_dict)
608    except IOError, e:
609        if display_errors:
610            log.critical("Could not write file %s", str(e))
611        raise IOError
Note: See TracBrowser for help on using the repository browser.