source: trunk/anuga_core/source/anuga/utilities/cg_ext.c @ 9072

Last change on this file since 9072 was 8691, checked in by steve, 12 years ago

Adding in the new c files

File size: 15.2 KB
Line 
1// C extension for the cg_solve module. Implements a c routine of the
2// conjugate gradient algorithm to solve Ax=b, using sparse matrix A in
3// the csr format.
4//
5// See the module cg_solve.py
6//
7// Padarn Wilson 2012
8//
9// Note Padarn 26/11/12: Currently the input matrix for the CG solve must
10// be a Sparse_CSR matrix python object - defined in anuga/utilities/sparse.py
11//
12// Note Padarn 5/12/12: I have tried a few optimization modifications which
13// didn't seem to save any time:
14// -- conversion of the long arrays to int arrays (to save memory passing time)
15// -- taking advantage of the symmetric quality of the matrix A to reduce the zAx loop
16// -- specifying different 'chunk' sizes for the openmp loops
17// -- using blas instead of own openmp loops
18       
19#include "Python.h"
20#include "numpy/arrayobject.h"
21#include "math.h"
22#include "stdio.h"
23#include "omp.h"
24
25
26// Dot product of two double vectors: a.b
27// @input N: int length of vectors a and b
28//        a: first vector of doubles
29//        b: second vector of double
30// @return: double result of a.b
31double ddot( int N, double *a, double *b)
32{
33  double ret = 0;
34  int i;
35  #pragma omp parallel for private(i) reduction(+:ret)
36  for(i=0;i<N;i++)
37  {
38    ret+=a[i]*b[i];
39  }
40  return ret;
41
42}
43
44// In place multiplication of a double vector x by constant a: a*x
45// @input N: int length of vector x
46//        a: double scalar to multiply by
47//        x: double vector to scale
48void dscal(int N, double a, double *x)
49{
50  int i;
51  #pragma omp parallel for private(i)
52  for(i=0;i<N;i++)
53  {
54    x[i]=a*x[i];
55  }
56
57}
58
59// Copy of one vector to another - memory already allocated: y=x
60// @input N: int length of vectors x and y
61//        x: double vector to make copy of
62//        y: double vector to copy into
63void dcopy( int N, double *x, double *y)
64{
65  int i;
66  #pragma omp parallel for private(i)
67  for(i=0;i<N;i++)
68  {
69    y[i]=x[i];
70  }
71}
72
73// In place axpy operation: y = a*x + y
74// @input N: int length of vectors x and y
75//        a: double to multiply x by
76//        x: first double vector
77//        y: second double vector, stores result
78void daxpy(int N, double a, double *x, double *y)
79{
80  int i;
81  #pragma omp parallel for private(i)
82  for(i=0;i<N;i++)
83  {
84    y[i]=y[i]+a*x[i];
85  }
86}
87
88// Sparse CSR matrix-vector product: z = A*x
89// @input z: double vector to store the result
90//        data: double vector with non-zero entries of A
91//        colind: long vector of column indicies of non-zero entries of A
92//        row_ptr: long vector giving index of rows for non-zero entires of A
93//        x: double vector to be multiplied
94//        M: length of vector x
95void zAx(double * z, double * data, long * colind, long * row_ptr, double * x, int M){
96
97 
98 
99  long i, j, ckey;
100
101
102     
103    #pragma omp parallel for private(ckey,j,i)
104    for (i=0; i<M; i++){
105      z[i]=0;
106      for (ckey=row_ptr[i]; ckey<row_ptr[i+1]; ckey++) {
107        j = colind[ckey];
108        z[i] += data[ckey]*x[j];
109      }             
110    }
111 
112
113}
114
115// Diagonal matrix-vector product: z = D*x
116// @input z: double vector to store the result
117//        D: double vector of diagonal matrix
118//        x: double vector to be multiplied
119//        M: length of vector x
120void zDx(double * z, double * D, double * x, int M){
121
122 
123  long i, j, ckey;
124   
125    #pragma omp parallel for private(ckey,j,i)
126    for (i=0; i<M; i++){
127      z[i]=D[i]*x[i];             
128    }
129 
130
131}
132
133// Diagonal matrix-vector product: z = D*x
134// @input z: double vector to store the result
135//        D: double vector of diagonal matrix
136//        x: double vector to be multiplied
137//        M: length of vector x
138void zDinx(double * z, double * D, double * x, int M){
139
140 
141  long i, j, ckey;
142   
143    #pragma omp parallel for private(ckey,j,i)
144    for (i=0; i<M; i++){
145      z[i]=1.0/D[i]*x[i];             
146    }
147 
148
149}
150
151
152
153// Sparse CSR matrix-vector product and vector addition: z = a*A*x + y
154// @input z: double vector to store the result
155//        a: double to scale matrix-vector product by
156//        data: double vector with non-zero entries of A
157//        colind: long vector of column indicies of non-zero entries of A
158//        row_ptr: long vector giving index of rows for non-zero entires of A
159//        x: double vector to be multiplied
160//        y: double vector to add
161//        M: length of vector x
162void zaAxpy(double * z, double a, double * data, long * colind, long * row_ptr, double * x,
163      double * y,int M){
164  long i, j, ckey;
165  #pragma omp parallel for private(ckey,j,i)
166    for (i=0; i<M; i++ ){
167      z[i]=y[i];
168
169      for (ckey=row_ptr[i]; ckey<row_ptr[i+1]; ckey++) {
170        j = colind[ckey];
171        z[i] += a*data[ckey]*x[j];
172      }             
173
174  }
175
176}
177
178// Jacobi preconditioner for matrix, A, and right hand side, b. Mutiplies each row
179// by one divided by the diagonal element of the matrix. If the diagonal
180// element is zero, does nothing (should nnot occur)
181//        colind: long vector of column indicies of non-zero entries of A
182//        row_ptr: long vector giving index of rows for non-zero entires of A
183//        b: double vector specifying right hand side of equation to solve
184//        M: length of vector b
185
186int _jacobi_precon_c(double* data, 
187                long* colind,
188                long* row_ptr,
189                double * precon,
190                int M){
191
192
193  long i, j, k, ckey;
194  double diag;
195
196
197     
198  #pragma omp parallel for private(diag,ckey,j,i)
199  for (i=0; i<M; i++){
200    diag = 0;
201    for (ckey=row_ptr[i]; ckey<row_ptr[i+1]; ckey++) {
202      j = colind[ckey];
203      if (i==j){
204        diag = data[ckey];
205      }
206    }
207    if (diag == 0){
208      diag =1;
209    }
210    precon[i]=diag;
211  }
212 
213  return 0;
214
215}
216
217// Conjugate gradient solve Ax = b for x, A given in Sparse CSR format
218// @input data: double vector with non-zero entries of A
219//        colind: long vector of column indicies of non-zero entries of A
220//        row_ptr: long vector giving index of rows for non-zero entires of A
221//        b: double vector specifying right hand side of equation to solve
222//        x: double vector with initial guess and to store result
223//        imax: maximum number of iterations
224//        tol: error tollerance for stopping criteria
225//        M: length of vectors x and b
226// @return: 0 on success 
227int _cg_solve_c(double* data, 
228                long* colind,
229                long* row_ptr,
230                double * b,
231                double * x,
232                int imax,
233                double tol,
234                double a_tol,
235                int M){
236
237  int i = 1;
238  double alpha,rTr,rTrOld,bt,rTr0;
239
240  double * d = malloc(sizeof(double)*M);
241  double * r = malloc(sizeof(double)*M);
242  double * q = malloc(sizeof(double)*M);
243  double * xold = malloc(sizeof(double)*M);
244
245  zaAxpy(r,-1.0,data,colind,row_ptr,x,b,M);
246  dcopy(M,r,d);
247
248  rTr=ddot(M,r,r);
249  rTr0 = rTr;
250 
251  while((i<imax) && (rTr>pow(tol,2)*rTr0) && (rTr > pow(a_tol,2))){
252
253    zAx(q,data,colind,row_ptr,d,M);
254    alpha = rTr/ddot(M,d,q);
255    dcopy(M,x,xold);
256    daxpy(M,alpha,d,x);
257
258    daxpy(M,-alpha,q,r);
259    rTrOld = rTr;
260    rTr = ddot(M,r,r);
261
262    bt= rTr/rTrOld;
263
264    dscal(M,bt,d);
265    daxpy(M,1.0,r,d);
266
267    i=i+1;
268
269  }
270 
271  free(d);
272  free(r);
273  free(q);
274  free(xold);
275
276  if (i>=imax){
277    return -1;
278  }
279  else{
280    return 0;
281  }
282 
283
284}         
285
286// Conjugate gradient solve Ax = b for x, A given in Sparse CSR format,
287// using a diagonal preconditioner M.
288// @input data: double vector with non-zero entries of A
289//        colind: long vector of column indicies of non-zero entries of A
290//        row_ptr: long vector giving index of rows for non-zero entires of A
291//        b: double vector specifying right hand side of equation to solve
292//        x: double vector with initial guess and to store result
293//        imax: maximum number of iterations
294//        tol: error tollerance for stopping criteria
295//        M: length of vectors x and b
296//        precon: diagonal preconditioner given as vector
297// @return: 0 on success 
298int _cg_solve_c_precon(double* data, 
299                long* colind,
300                long* row_ptr,
301                double * b,
302                double * x,
303                int imax,
304                double tol,
305                double a_tol,
306                int M,
307                double * precon){
308
309  int i = 1;
310  double alpha,rTr,rTrOld,bt,rTr0;
311
312  double * d = malloc(sizeof(double)*M);
313  double * r = malloc(sizeof(double)*M);
314  double * q = malloc(sizeof(double)*M);
315  double * xold = malloc(sizeof(double)*M);
316  double * rhat = malloc(sizeof(double)*M);
317  double * temp = malloc(sizeof(double)*M);
318
319  zaAxpy(r,-1.0,data,colind,row_ptr,x,b,M);
320  zDinx(rhat,precon,r,M);
321  dcopy(M,rhat,d);
322
323  rTr=ddot(M,r,rhat);
324  rTr0 = rTr;
325 
326  while((i<imax) && (rTr>pow(tol,2)*rTr0) && (rTr > pow(a_tol,2))){
327
328    zAx(q,data,colind,row_ptr,d,M);
329    alpha = rTr/ddot(M,d,q);
330    dcopy(M,x,xold);
331    daxpy(M,alpha,d,x);
332
333    daxpy(M,-alpha,q,r);
334    zDinx(rhat,precon,r,M);
335    rTrOld = rTr;
336    rTr = ddot(M,r,rhat);
337
338    bt= rTr/rTrOld;
339
340    dscal(M,bt,d);
341    daxpy(M,1.0,rhat,d);
342
343    i=i+1;
344
345  }
346  free(temp);
347  free(rhat);
348  free(d);
349  free(r);
350  free(q);
351  free(xold);
352
353  if (i>=imax){
354    return -1;
355  }
356  else{
357    return 0;
358  }
359 
360
361}       
362
363                     
364/////////////////////////////////////////////////
365// Gateways to Python
366
367PyObject *jacobi_precon_c(PyObject *self, PyObject *args){
368
369  int M,err,bcols;
370 
371  PyObject *csr_sparse; // input sparse matrix (must be CSR format)
372 
373  PyArrayObject
374    *data,            //Non Zeros Data array
375    *colind,          //Column indices array
376    *row_ptr,         //Row pointers array
377    *precon;                //Right hand side
378
379 
380  // Convert Python arguments to C 
381  if (!PyArg_ParseTuple(args, "OO", &csr_sparse, &precon)) {
382    PyErr_SetString(PyExc_RuntimeError, "jacobi_precon_c could not parse input"); 
383    return NULL;
384  }
385
386  // Extract three subarrays making up the sparse matrix in CSR format.
387  data = (PyArrayObject*) 
388    PyObject_GetAttrString(csr_sparse, "data");     
389  if (!data) {
390    PyErr_SetString(PyExc_RuntimeError, 
391        "Data array could not be allocated in cg_solve_c");     
392    return NULL;
393  } 
394
395  colind = (PyArrayObject*)
396    PyObject_GetAttrString(csr_sparse, "colind"); 
397  if (!colind) {
398    PyErr_SetString(PyExc_RuntimeError, 
399        "Column index array could not be allocated in cg_solve_c");     
400    return NULL;
401  } 
402
403  row_ptr = (PyArrayObject*) 
404    PyObject_GetAttrString(csr_sparse, "row_ptr");   
405  if (!row_ptr) {
406    PyErr_SetString(PyExc_RuntimeError, 
407        "Row pointer array could not be allocated in cg_solve_c"); 
408  }
409 
410  M = (row_ptr -> dimensions[0])-1;
411
412  // Precondition the matrix and right hand side
413  err = _jacobi_precon_c((double*) data->data, 
414                (long*) colind->data,
415                (long*) row_ptr->data,
416                (double *) precon->data,
417                M);
418
419  // Free extra references to sparse matrix parts
420  Py_DECREF(data);   
421  Py_DECREF(colind);   
422  Py_DECREF(row_ptr);     
423
424  return Py_BuildValue("");
425
426
427}
428
429PyObject *cg_solve_c(PyObject *self, PyObject *args) {
430 
431 
432  PyObject *csr_sparse; // input sparse matrix (must be CSR format)
433 
434  int imax,M,err,bcols;
435  double tol,a_tol;
436
437  PyArrayObject
438    *data,            //Non Zeros Data array
439    *colind,          //Column indices array
440    *row_ptr,         //Row pointers array
441    *x0,               //Initial guess - and sotrage of result.
442    *b;                //Right hand side
443
444 
445  // Convert Python arguments to C 
446  if (!PyArg_ParseTuple(args, "OOOiddi", &csr_sparse, &x0, &b, &imax, &tol, &a_tol, &bcols)) {
447    PyErr_SetString(PyExc_RuntimeError, "cg_solve_c could not parse input"); 
448    return NULL;
449  }
450
451  // Extract three subarrays making up the sparse matrix in CSR format.
452  data = (PyArrayObject*) 
453    PyObject_GetAttrString(csr_sparse, "data");     
454  if (!data) {
455    PyErr_SetString(PyExc_RuntimeError, 
456                    "Data array could not be allocated in cg_solve_c");     
457    return NULL;
458  } 
459
460  colind = (PyArrayObject*)
461    PyObject_GetAttrString(csr_sparse, "colind"); 
462  if (!colind) {
463    PyErr_SetString(PyExc_RuntimeError, 
464                    "Column index array could not be allocated in cg_solve_c");     
465    return NULL;
466  } 
467
468  row_ptr = (PyArrayObject*) 
469    PyObject_GetAttrString(csr_sparse, "row_ptr");   
470  if (!row_ptr) {
471    PyErr_SetString(PyExc_RuntimeError, 
472                    "Row pointer array could not be allocated in cg_solve_c"); 
473  }
474 
475  M = (row_ptr -> dimensions[0])-1;
476 
477  // Solve system uisng conjugate gradient
478  err = _cg_solve_c((double*) data->data, 
479                (long*) colind->data,
480                (long*) row_ptr->data,
481                (double *) b->data,
482                (double *) x0->data,
483                imax,
484                tol,
485                a_tol,
486                M);
487 
488  // Free extra references to sparse matrix parts
489  Py_DECREF(data);   
490  Py_DECREF(colind);   
491  Py_DECREF(row_ptr);     
492
493  return Py_BuildValue("i",err);
494}
495
496PyObject *cg_solve_c_precon(PyObject *self, PyObject *args) {
497 
498 
499  PyObject *csr_sparse; // input sparse matrix (must be CSR format)
500 
501  int imax,M,err,bcols;
502  double tol,a_tol;
503
504  PyArrayObject
505    *data,            //Non Zeros Data array
506    *colind,          //Column indices array
507    *row_ptr,         //Row pointers array
508    *x0,               //Initial guess - and sotrage of result.
509    *b,                //Right hand side
510    *precon;           //diagonal preconditioner
511
512 
513  // Convert Python arguments to C 
514  if (!PyArg_ParseTuple(args, "OOOiddiO", &csr_sparse, &x0, &b, &imax, &tol, &a_tol, &bcols, &precon)) {
515    PyErr_SetString(PyExc_RuntimeError, "cg_solve_c_precon could not parse input"); 
516    return NULL;
517  }
518
519  // Extract three subarrays making up the sparse matrix in CSR format.
520  data = (PyArrayObject*) 
521    PyObject_GetAttrString(csr_sparse, "data");     
522  if (!data) {
523    PyErr_SetString(PyExc_RuntimeError, 
524        "Data array could not be allocated in cg_solve_c");     
525    return NULL;
526  } 
527
528  colind = (PyArrayObject*)
529    PyObject_GetAttrString(csr_sparse, "colind"); 
530  if (!colind) {
531    PyErr_SetString(PyExc_RuntimeError, 
532        "Column index array could not be allocated in cg_solve_c");     
533    return NULL;
534  } 
535
536  row_ptr = (PyArrayObject*) 
537    PyObject_GetAttrString(csr_sparse, "row_ptr");   
538  if (!row_ptr) {
539    PyErr_SetString(PyExc_RuntimeError, 
540        "Row pointer array could not be allocated in cg_solve_c"); 
541  }
542 
543  M = (row_ptr -> dimensions[0])-1;
544 
545  // Solve system uisng conjugate gradient
546  err = _cg_solve_c_precon((double*) data->data, 
547                (long*) colind->data,
548                (long*) row_ptr->data,
549                (double *) b->data,
550                (double *) x0->data,
551                imax,
552                tol,
553                a_tol,
554                M,
555                (double *) precon->data);
556 
557  // Free extra references to sparse matrix parts
558  Py_DECREF(data);   
559  Py_DECREF(colind);   
560  Py_DECREF(row_ptr);     
561
562  return Py_BuildValue("i",err);
563}
564
565
566
567// Method table for python module
568static struct PyMethodDef MethodTable[] = {
569  {"cg_solve_c", cg_solve_c, METH_VARARGS, "Print out"},
570  {"cg_solve_c_precon", cg_solve_c_precon, METH_VARARGS, "Print out"},
571  {"jacobi_precon_c", jacobi_precon_c, METH_VARARGS, "Print out"},   
572  {NULL, NULL, 0, NULL}   /* sentinel */
573};
574
575// Module initialisation   
576void initcg_ext(void){
577  Py_InitModule("cg_ext", MethodTable);
578 
579  import_array();     //Necessary for handling of NumPY structures 
580}
581
Note: See TracBrowser for help on using the repository browser.