1 | """Least squares fitting. |
---|
2 | |
---|
3 | Implements a penalised least-squares fit. |
---|
4 | |
---|
5 | The penalty term (or smoothing term) is controlled by the smoothing |
---|
6 | parameter alpha. |
---|
7 | With a value of alpha=0, the fit function will attempt |
---|
8 | to interpolate as closely as possible in the least-squares sense. |
---|
9 | With values alpha > 0, a certain amount of smoothing will be applied. |
---|
10 | A positive alpha is essential in cases where there are too few |
---|
11 | data points. |
---|
12 | A negative alpha is not allowed. |
---|
13 | A typical value of alpha is 1.0e-6 |
---|
14 | |
---|
15 | |
---|
16 | Ole Nielsen, Stephen Roberts, Duncan Gray, Christopher Zoppou |
---|
17 | Geoscience Australia, 2004. |
---|
18 | |
---|
19 | TO DO |
---|
20 | * test geo_ref, geo_spatial |
---|
21 | |
---|
22 | IDEAS |
---|
23 | * (DSG-) Change the interface of fit, so a domain object can |
---|
24 | be passed in. (I don't know if this is feasible). If could |
---|
25 | save time/memory. |
---|
26 | """ |
---|
27 | import types |
---|
28 | |
---|
29 | from Numeric import zeros, Float, ArrayType,take |
---|
30 | |
---|
31 | from anuga.caching import cache |
---|
32 | from anuga.geospatial_data.geospatial_data import Geospatial_data, \ |
---|
33 | ensure_absolute |
---|
34 | from anuga.fit_interpolate.general_fit_interpolate import FitInterpolate |
---|
35 | from anuga.utilities.sparse import Sparse, Sparse_CSR |
---|
36 | from anuga.utilities.polygon import in_and_outside_polygon |
---|
37 | from anuga.fit_interpolate.search_functions import search_tree_of_vertices |
---|
38 | from anuga.utilities.cg_solve import conjugate_gradient |
---|
39 | from anuga.utilities.numerical_tools import ensure_numeric, gradient |
---|
40 | |
---|
41 | import exceptions |
---|
42 | class ToFewPointsError(exceptions.Exception): pass |
---|
43 | class VertsWithNoTrianglesError(exceptions.Exception): pass |
---|
44 | |
---|
45 | DEFAULT_ALPHA = 0.001 |
---|
46 | |
---|
47 | |
---|
48 | class Fit(FitInterpolate): |
---|
49 | |
---|
50 | def __init__(self, |
---|
51 | vertex_coordinates, |
---|
52 | triangles, |
---|
53 | mesh_origin=None, |
---|
54 | alpha = None, |
---|
55 | verbose=False, |
---|
56 | max_vertices_per_cell=30): |
---|
57 | |
---|
58 | |
---|
59 | """ |
---|
60 | Fit data at points to the vertices of a mesh. |
---|
61 | |
---|
62 | Inputs: |
---|
63 | |
---|
64 | vertex_coordinates: List of coordinate pairs [xi, eta] of |
---|
65 | points constituting a mesh (or an m x 2 Numeric array or |
---|
66 | a geospatial object) |
---|
67 | Points may appear multiple times |
---|
68 | (e.g. if vertices have discontinuities) |
---|
69 | |
---|
70 | triangles: List of 3-tuples (or a Numeric array) of |
---|
71 | integers representing indices of all vertices in the mesh. |
---|
72 | |
---|
73 | mesh_origin: A geo_reference object or 3-tuples consisting of |
---|
74 | UTM zone, easting and northing. |
---|
75 | If specified vertex coordinates are assumed to be |
---|
76 | relative to their respective origins. |
---|
77 | |
---|
78 | max_vertices_per_cell: Number of vertices in a quad tree cell |
---|
79 | at which the cell is split into 4. |
---|
80 | |
---|
81 | Note: Don't supply a vertex coords as a geospatial object and |
---|
82 | a mesh origin, since geospatial has its own mesh origin. |
---|
83 | |
---|
84 | |
---|
85 | Usage, |
---|
86 | To use this in a blocking way, call build_fit_subset, with z info, |
---|
87 | and then fit, with no point coord, z info. |
---|
88 | |
---|
89 | """ |
---|
90 | # Initialise variabels |
---|
91 | |
---|
92 | if alpha is None: |
---|
93 | |
---|
94 | self.alpha = DEFAULT_ALPHA |
---|
95 | else: |
---|
96 | self.alpha = alpha |
---|
97 | FitInterpolate.__init__(self, |
---|
98 | vertex_coordinates, |
---|
99 | triangles, |
---|
100 | mesh_origin, |
---|
101 | verbose, |
---|
102 | max_vertices_per_cell) |
---|
103 | |
---|
104 | m = self.mesh.number_of_nodes # Nbr of basis functions (vertices) |
---|
105 | |
---|
106 | self.AtA = None |
---|
107 | self.Atz = None |
---|
108 | |
---|
109 | self.point_count = 0 |
---|
110 | if self.alpha <> 0: |
---|
111 | if verbose: print 'Building smoothing matrix' |
---|
112 | self._build_smoothing_matrix_D() |
---|
113 | |
---|
114 | def _build_coefficient_matrix_B(self, |
---|
115 | verbose = False): |
---|
116 | """ |
---|
117 | Build final coefficient matrix |
---|
118 | |
---|
119 | Precon |
---|
120 | If alpha is not zero, matrix D has been built |
---|
121 | Matrix Ata has been built |
---|
122 | """ |
---|
123 | |
---|
124 | if self.alpha <> 0: |
---|
125 | #if verbose: print 'Building smoothing matrix' |
---|
126 | #self._build_smoothing_matrix_D() |
---|
127 | self.B = self.AtA + self.alpha*self.D |
---|
128 | else: |
---|
129 | self.B = self.AtA |
---|
130 | |
---|
131 | #Convert self.B matrix to CSR format for faster matrix vector |
---|
132 | self.B = Sparse_CSR(self.B) |
---|
133 | |
---|
134 | def _build_smoothing_matrix_D(self): |
---|
135 | """Build m x m smoothing matrix, where |
---|
136 | m is the number of basis functions phi_k (one per vertex) |
---|
137 | |
---|
138 | The smoothing matrix is defined as |
---|
139 | |
---|
140 | D = D1 + D2 |
---|
141 | |
---|
142 | where |
---|
143 | |
---|
144 | [D1]_{k,l} = \int_\Omega |
---|
145 | \frac{\partial \phi_k}{\partial x} |
---|
146 | \frac{\partial \phi_l}{\partial x}\, |
---|
147 | dx dy |
---|
148 | |
---|
149 | [D2]_{k,l} = \int_\Omega |
---|
150 | \frac{\partial \phi_k}{\partial y} |
---|
151 | \frac{\partial \phi_l}{\partial y}\, |
---|
152 | dx dy |
---|
153 | |
---|
154 | |
---|
155 | The derivatives \frac{\partial \phi_k}{\partial x}, |
---|
156 | \frac{\partial \phi_k}{\partial x} for a particular triangle |
---|
157 | are obtained by computing the gradient a_k, b_k for basis function k |
---|
158 | """ |
---|
159 | |
---|
160 | #FIXME: algorithm might be optimised by computing local 9x9 |
---|
161 | #"element stiffness matrices: |
---|
162 | |
---|
163 | m = self.mesh.number_of_nodes # Nbr of basis functions (1/vertex) |
---|
164 | |
---|
165 | self.D = Sparse(m,m) |
---|
166 | |
---|
167 | #For each triangle compute contributions to D = D1+D2 |
---|
168 | for i in range(len(self.mesh)): |
---|
169 | |
---|
170 | #Get area |
---|
171 | area = self.mesh.areas[i] |
---|
172 | |
---|
173 | #Get global vertex indices |
---|
174 | v0 = self.mesh.triangles[i,0] |
---|
175 | v1 = self.mesh.triangles[i,1] |
---|
176 | v2 = self.mesh.triangles[i,2] |
---|
177 | |
---|
178 | #Get the three vertex_points |
---|
179 | xi0 = self.mesh.get_vertex_coordinate(i, 0) |
---|
180 | xi1 = self.mesh.get_vertex_coordinate(i, 1) |
---|
181 | xi2 = self.mesh.get_vertex_coordinate(i, 2) |
---|
182 | |
---|
183 | #Compute gradients for each vertex |
---|
184 | a0, b0 = gradient(xi0[0], xi0[1], xi1[0], xi1[1], xi2[0], xi2[1], |
---|
185 | 1, 0, 0) |
---|
186 | |
---|
187 | a1, b1 = gradient(xi0[0], xi0[1], xi1[0], xi1[1], xi2[0], xi2[1], |
---|
188 | 0, 1, 0) |
---|
189 | |
---|
190 | a2, b2 = gradient(xi0[0], xi0[1], xi1[0], xi1[1], xi2[0], xi2[1], |
---|
191 | 0, 0, 1) |
---|
192 | |
---|
193 | #Compute diagonal contributions |
---|
194 | self.D[v0,v0] += (a0*a0 + b0*b0)*area |
---|
195 | self.D[v1,v1] += (a1*a1 + b1*b1)*area |
---|
196 | self.D[v2,v2] += (a2*a2 + b2*b2)*area |
---|
197 | |
---|
198 | #Compute contributions for basis functions sharing edges |
---|
199 | e01 = (a0*a1 + b0*b1)*area |
---|
200 | self.D[v0,v1] += e01 |
---|
201 | self.D[v1,v0] += e01 |
---|
202 | |
---|
203 | e12 = (a1*a2 + b1*b2)*area |
---|
204 | self.D[v1,v2] += e12 |
---|
205 | self.D[v2,v1] += e12 |
---|
206 | |
---|
207 | e20 = (a2*a0 + b2*b0)*area |
---|
208 | self.D[v2,v0] += e20 |
---|
209 | self.D[v0,v2] += e20 |
---|
210 | |
---|
211 | |
---|
212 | def get_D(self): |
---|
213 | return self.D.todense() |
---|
214 | |
---|
215 | |
---|
216 | def _build_matrix_AtA_Atz(self, |
---|
217 | point_coordinates, |
---|
218 | z, |
---|
219 | verbose = False): |
---|
220 | """Build: |
---|
221 | AtA m x m interpolation matrix, and, |
---|
222 | Atz m x a interpolation matrix where, |
---|
223 | m is the number of basis functions phi_k (one per vertex) |
---|
224 | a is the number of data attributes |
---|
225 | |
---|
226 | This algorithm uses a quad tree data structure for fast binning of |
---|
227 | data points. |
---|
228 | |
---|
229 | If Ata is None, the matrices AtA and Atz are created. |
---|
230 | |
---|
231 | This function can be called again and again, with sub-sets of |
---|
232 | the point coordinates. Call fit to get the results. |
---|
233 | |
---|
234 | Preconditions |
---|
235 | z and points are numeric |
---|
236 | Point_coordindates and mesh vertices have the same origin. |
---|
237 | |
---|
238 | The number of attributes of the data points does not change |
---|
239 | """ |
---|
240 | #Build n x m interpolation matrix |
---|
241 | |
---|
242 | if self.AtA == None: |
---|
243 | # AtA and Atz need to be initialised. |
---|
244 | m = self.mesh.number_of_nodes |
---|
245 | if len(z.shape) > 1: |
---|
246 | att_num = z.shape[1] |
---|
247 | self.Atz = zeros((m,att_num), Float) |
---|
248 | else: |
---|
249 | att_num = 1 |
---|
250 | self.Atz = zeros((m,), Float) |
---|
251 | assert z.shape[0] == point_coordinates.shape[0] |
---|
252 | |
---|
253 | self.AtA = Sparse(m,m) |
---|
254 | # The memory damage has been done by now. |
---|
255 | |
---|
256 | self.point_count += point_coordinates.shape[0] |
---|
257 | #print "_build_matrix_AtA_Atz - self.point_count", self.point_count |
---|
258 | if verbose: print 'Getting indices inside mesh boundary' |
---|
259 | #print 'point_coordinates.shape', point_coordinates.shape |
---|
260 | #print 'self.mesh.get_boundary_polygon()',\ |
---|
261 | # self.mesh.get_boundary_polygon() |
---|
262 | |
---|
263 | inside_poly_indices, outside_poly_indices = \ |
---|
264 | in_and_outside_polygon(point_coordinates, |
---|
265 | self.mesh.get_boundary_polygon(), |
---|
266 | closed = True, verbose = verbose) |
---|
267 | #print "self.inside_poly_indices",self.inside_poly_indices |
---|
268 | #print "self.outside_poly_indices",self.outside_poly_indices |
---|
269 | |
---|
270 | |
---|
271 | n = len(inside_poly_indices) |
---|
272 | if verbose: print 'Building fitting matrix from %d points' %n |
---|
273 | #Compute matrix elements for points inside the mesh |
---|
274 | for k, i in enumerate(inside_poly_indices): |
---|
275 | #For each data_coordinate point |
---|
276 | if verbose and k%((n+10)/10)==0: print 'Doing %d of %d' %(k, n) |
---|
277 | x = point_coordinates[i] |
---|
278 | element_found, sigma0, sigma1, sigma2, k = \ |
---|
279 | search_tree_of_vertices(self.root, self.mesh, x) |
---|
280 | |
---|
281 | if element_found is True: |
---|
282 | j0 = self.mesh.triangles[k,0] #Global vertex id for sigma0 |
---|
283 | j1 = self.mesh.triangles[k,1] #Global vertex id for sigma1 |
---|
284 | j2 = self.mesh.triangles[k,2] #Global vertex id for sigma2 |
---|
285 | |
---|
286 | sigmas = {j0:sigma0, j1:sigma1, j2:sigma2} |
---|
287 | js = [j0,j1,j2] |
---|
288 | |
---|
289 | for j in js: |
---|
290 | self.Atz[j] += sigmas[j]*z[i] |
---|
291 | #print "self.Atz building", self.Atz |
---|
292 | #print "self.Atz[j]", self.Atz[j] |
---|
293 | #print " sigmas[j]", sigmas[j] |
---|
294 | #print "z[i]",z[i] |
---|
295 | #print "result", sigmas[j]*z[i] |
---|
296 | |
---|
297 | for k in js: |
---|
298 | self.AtA[j,k] += sigmas[j]*sigmas[k] |
---|
299 | else: |
---|
300 | msg = 'Could not find triangle for point', x |
---|
301 | raise Exception(msg) |
---|
302 | |
---|
303 | |
---|
304 | def fit(self, point_coordinates_or_filename=None, z=None, |
---|
305 | verbose=False, |
---|
306 | point_origin=None, |
---|
307 | attribute_name=None, |
---|
308 | max_read_lines=500): |
---|
309 | """Fit a smooth surface to given 1d array of data points z. |
---|
310 | |
---|
311 | The smooth surface is computed at each vertex in the underlying |
---|
312 | mesh using the formula given in the module doc string. |
---|
313 | |
---|
314 | Inputs: |
---|
315 | point_coordinates: The co-ordinates of the data points. |
---|
316 | List of coordinate pairs [x, y] of |
---|
317 | data points or an nx2 Numeric array or a Geospatial_data object |
---|
318 | z: Single 1d vector or array of data at the point_coordinates. |
---|
319 | |
---|
320 | """ |
---|
321 | # use blocking to load in the point info |
---|
322 | if type(point_coordinates_or_filename) == types.StringType: |
---|
323 | msg = "Don't set a point origin when reading from a file" |
---|
324 | assert point_origin is None, msg |
---|
325 | filename = point_coordinates_or_filename |
---|
326 | for i,geo_block in enumerate(Geospatial_data(filename, |
---|
327 | max_read_lines=max_read_lines, |
---|
328 | load_file_now=False, |
---|
329 | verbose=verbose)): |
---|
330 | if verbose is True and 0 == i%200: # round every 5 minutes |
---|
331 | print 'Block %i' %i |
---|
332 | # build the array |
---|
333 | points = geo_block.get_data_points(absolute=True) |
---|
334 | z = geo_block.get_attributes(attribute_name=attribute_name) |
---|
335 | self.build_fit_subset(points, z) |
---|
336 | point_coordinates = None |
---|
337 | else: |
---|
338 | point_coordinates = point_coordinates_or_filename |
---|
339 | |
---|
340 | if point_coordinates is None: |
---|
341 | assert self.AtA <> None |
---|
342 | assert self.Atz <> None |
---|
343 | #FIXME (DSG) - do a message |
---|
344 | else: |
---|
345 | point_coordinates = ensure_absolute(point_coordinates, |
---|
346 | geo_reference=point_origin) |
---|
347 | #if isinstance(point_coordinates,Geospatial_data) and z is None: |
---|
348 | # z will come from the geo-ref |
---|
349 | self.build_fit_subset(point_coordinates, z, verbose) |
---|
350 | |
---|
351 | #Check sanity |
---|
352 | m = self.mesh.number_of_nodes # Nbr of basis functions (1/vertex) |
---|
353 | n = self.point_count |
---|
354 | if n<m and self.alpha == 0.0: |
---|
355 | msg = 'ERROR (least_squares): Too few data points\n' |
---|
356 | msg += 'There are only %d data points and alpha == 0. ' %n |
---|
357 | msg += 'Need at least %d\n' %m |
---|
358 | msg += 'Alternatively, set smoothing parameter alpha to a small ' |
---|
359 | msg += 'positive value,\ne.g. 1.0e-3.' |
---|
360 | raise ToFewPointsError(msg) |
---|
361 | |
---|
362 | self._build_coefficient_matrix_B(verbose) |
---|
363 | loners = self.mesh.get_lone_vertices() |
---|
364 | # FIXME - make this as error message. |
---|
365 | # test with |
---|
366 | # Not_yet_test_smooth_att_to_mesh_with_excess_verts. |
---|
367 | if len(loners)>0: |
---|
368 | msg = 'WARNING: (least_squares): \nVertices with no triangles\n' |
---|
369 | msg += 'All vertices should be part of a triangle.\n' |
---|
370 | msg += 'In the future this will be inforced.\n' |
---|
371 | msg += 'The following vertices are not part of a triangle;\n' |
---|
372 | msg += str(loners) |
---|
373 | print msg |
---|
374 | #raise VertsWithNoTrianglesError(msg) |
---|
375 | |
---|
376 | |
---|
377 | return conjugate_gradient(self.B, self.Atz, self.Atz, |
---|
378 | imax=2*len(self.Atz) ) |
---|
379 | |
---|
380 | |
---|
381 | def build_fit_subset(self, point_coordinates, z=None, attribute_name=None, |
---|
382 | verbose=False): |
---|
383 | """Fit a smooth surface to given 1d array of data points z. |
---|
384 | |
---|
385 | The smooth surface is computed at each vertex in the underlying |
---|
386 | mesh using the formula given in the module doc string. |
---|
387 | |
---|
388 | Inputs: |
---|
389 | point_coordinates: The co-ordinates of the data points. |
---|
390 | List of coordinate pairs [x, y] of |
---|
391 | data points or an nx2 Numeric array or a Geospatial_data object |
---|
392 | z: Single 1d vector or array of data at the point_coordinates. |
---|
393 | attribute_name: Used to get the z values from the |
---|
394 | geospatial object if no attribute_name is specified, |
---|
395 | it's a bit of a lucky dip as to what attributes you get. |
---|
396 | If there is only one attribute it will be that one. |
---|
397 | |
---|
398 | """ |
---|
399 | |
---|
400 | #FIXME(DSG-DSG): Check that the vert and point coords |
---|
401 | #have the same zone. |
---|
402 | if isinstance(point_coordinates,Geospatial_data): |
---|
403 | point_coordinates = point_coordinates.get_data_points( \ |
---|
404 | absolute = True) |
---|
405 | |
---|
406 | #Convert input to Numeric arrays |
---|
407 | if z is not None: |
---|
408 | z = ensure_numeric(z, Float) |
---|
409 | else: |
---|
410 | msg = 'z not specified' |
---|
411 | assert isinstance(point_coordinates,Geospatial_data), msg |
---|
412 | z = point_coordinates.get_attributes(attribute_name) |
---|
413 | |
---|
414 | point_coordinates = ensure_numeric(point_coordinates, Float) |
---|
415 | |
---|
416 | self._build_matrix_AtA_Atz(point_coordinates, z, verbose) |
---|
417 | |
---|
418 | |
---|
419 | ############################################################################ |
---|
420 | |
---|
421 | def fit_to_mesh(vertex_coordinates, |
---|
422 | triangles, |
---|
423 | point_coordinates, # this can also be a .csv/.txt file name |
---|
424 | point_attributes=None, |
---|
425 | alpha=DEFAULT_ALPHA, |
---|
426 | verbose=False, |
---|
427 | acceptable_overshoot=1.01, |
---|
428 | mesh_origin=None, |
---|
429 | data_origin=None, |
---|
430 | max_read_lines=None, |
---|
431 | attribute_name=None, |
---|
432 | use_cache = False): |
---|
433 | """ |
---|
434 | Fit a smooth surface to a triangulation, |
---|
435 | given data points with attributes. |
---|
436 | |
---|
437 | |
---|
438 | Inputs: |
---|
439 | vertex_coordinates: List of coordinate pairs [xi, eta] of |
---|
440 | points constituting a mesh (or an m x 2 Numeric array or |
---|
441 | a geospatial object) |
---|
442 | Points may appear multiple times |
---|
443 | (e.g. if vertices have discontinuities) |
---|
444 | |
---|
445 | triangles: List of 3-tuples (or a Numeric array) of |
---|
446 | integers representing indices of all vertices in the mesh. |
---|
447 | |
---|
448 | point_coordinates: List of coordinate pairs [x, y] of data points |
---|
449 | (or an nx2 Numeric array) |
---|
450 | |
---|
451 | alpha: Smoothing parameter. |
---|
452 | |
---|
453 | acceptable overshoot: controls the allowed factor by which fitted values |
---|
454 | may exceed the value of input data. The lower limit is defined |
---|
455 | as min(z) - acceptable_overshoot*delta z and upper limit |
---|
456 | as max(z) + acceptable_overshoot*delta z |
---|
457 | |
---|
458 | mesh_origin: A geo_reference object or 3-tuples consisting of |
---|
459 | UTM zone, easting and northing. |
---|
460 | If specified vertex coordinates are assumed to be |
---|
461 | relative to their respective origins. |
---|
462 | |
---|
463 | |
---|
464 | point_attributes: Vector or array of data at the |
---|
465 | point_coordinates. |
---|
466 | |
---|
467 | """ |
---|
468 | #Since this is a wrapper for fit, lets handle the geo_spatial att's |
---|
469 | if use_cache is True: |
---|
470 | interp = cache(_fit, |
---|
471 | (vertex_coordinates, |
---|
472 | triangles), |
---|
473 | {'verbose': verbose, |
---|
474 | 'mesh_origin': mesh_origin, |
---|
475 | 'alpha':alpha}, |
---|
476 | verbose = verbose) |
---|
477 | |
---|
478 | else: |
---|
479 | interp = Fit(vertex_coordinates, |
---|
480 | triangles, |
---|
481 | verbose=verbose, |
---|
482 | mesh_origin=mesh_origin, |
---|
483 | alpha=alpha) |
---|
484 | |
---|
485 | vertex_attributes = interp.fit(point_coordinates, |
---|
486 | point_attributes, |
---|
487 | point_origin=data_origin, |
---|
488 | max_read_lines=max_read_lines, |
---|
489 | attribute_name=attribute_name, |
---|
490 | verbose=verbose) |
---|
491 | |
---|
492 | |
---|
493 | # Add the value checking stuff that's in least squares. |
---|
494 | # Maybe this stuff should get pushed down into Fit. |
---|
495 | # at least be a method of Fit. |
---|
496 | # Or intigrate it into the fit method, saving teh max and min's |
---|
497 | # as att's. |
---|
498 | |
---|
499 | return vertex_attributes |
---|
500 | |
---|
501 | def _fit(*args, **kwargs): |
---|
502 | """Private function for use with caching. Reason is that classes |
---|
503 | may change their byte code between runs which is annoying. |
---|
504 | """ |
---|
505 | |
---|
506 | return Fit(*args, **kwargs) |
---|
507 | |
---|