Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

cg_ext.c @ 9072

Last change on this file since 9072 was 8691, checked in by steve, 12 years ago
Adding in the new c files
File size: 15.2 KB

Line
1	// C extension for the cg_solve module. Implements a c routine of the
2	// conjugate gradient algorithm to solve Ax=b, using sparse matrix A in
3	// the csr format.
4	//
5	// See the module cg_solve.py
6	//
7	// Padarn Wilson 2012
8	//
9	// Note Padarn 26/11/12: Currently the input matrix for the CG solve must
10	// be a Sparse_CSR matrix python object - defined in anuga/utilities/sparse.py
11	//
12	// Note Padarn 5/12/12: I have tried a few optimization modifications which
13	// didn't seem to save any time:
14	// -- conversion of the long arrays to int arrays (to save memory passing time)
15	// -- taking advantage of the symmetric quality of the matrix A to reduce the zAx loop
16	// -- specifying different 'chunk' sizes for the openmp loops
17	// -- using blas instead of own openmp loops
18
19	#include "Python.h"
20	#include "numpy/arrayobject.h"
21	#include "math.h"
22	#include "stdio.h"
23	#include "omp.h"
24
25
26	// Dot product of two double vectors: a.b
27	// @input N: int length of vectors a and b
28	// a: first vector of doubles
29	// b: second vector of double
30	// @return: double result of a.b
31	double ddot( int N, double a, double b)
32	{
33	double ret = 0;
34	int i;
35	#pragma omp parallel for private(i) reduction(+:ret)
36	for(i=0;i<N;i++)
37	{
38	ret+=a[i]*b[i];
39	}
40	return ret;
41
42	}
43
44	// In place multiplication of a double vector x by constant a: a*x
45	// @input N: int length of vector x
46	// a: double scalar to multiply by
47	// x: double vector to scale
48	void dscal(int N, double a, double *x)
49	{
50	int i;
51	#pragma omp parallel for private(i)
52	for(i=0;i<N;i++)
53	{
54	x[i]=a*x[i];
55	}
56
57	}
58
59	// Copy of one vector to another - memory already allocated: y=x
60	// @input N: int length of vectors x and y
61	// x: double vector to make copy of
62	// y: double vector to copy into
63	void dcopy( int N, double x, double y)
64	{
65	int i;
66	#pragma omp parallel for private(i)
67	for(i=0;i<N;i++)
68	{
69	y[i]=x[i];
70	}
71	}
72
73	// In place axpy operation: y = a*x + y
74	// @input N: int length of vectors x and y
75	// a: double to multiply x by
76	// x: first double vector
77	// y: second double vector, stores result
78	void daxpy(int N, double a, double x, double y)
79	{
80	int i;
81	#pragma omp parallel for private(i)
82	for(i=0;i<N;i++)
83	{
84	y[i]=y[i]+a*x[i];
85	}
86	}
87
88	// Sparse CSR matrix-vector product: z = A*x
89	// @input z: double vector to store the result
90	// data: double vector with non-zero entries of A
91	// colind: long vector of column indicies of non-zero entries of A
92	// row_ptr: long vector giving index of rows for non-zero entires of A
93	// x: double vector to be multiplied
94	// M: length of vector x
95	void zAx(double * z, double * data, long * colind, long * row_ptr, double * x, int M){
96
97
98
99	long i, j, ckey;
100
101
102
103	#pragma omp parallel for private(ckey,j,i)
104	for (i=0; i<M; i++){
105	z[i]=0;
106	for (ckey=row_ptr[i]; ckey<row_ptr[i+1]; ckey++) {
107	j = colind[ckey];
108	z[i] += data[ckey]*x[j];
109	}
110	}
111
112
113	}
114
115	// Diagonal matrix-vector product: z = D*x
116	// @input z: double vector to store the result
117	// D: double vector of diagonal matrix
118	// x: double vector to be multiplied
119	// M: length of vector x
120	void zDx(double * z, double * D, double * x, int M){
121
122
123	long i, j, ckey;
124
125	#pragma omp parallel for private(ckey,j,i)
126	for (i=0; i<M; i++){
127	z[i]=D[i]*x[i];
128	}
129
130
131	}
132
133	// Diagonal matrix-vector product: z = D*x
134	// @input z: double vector to store the result
135	// D: double vector of diagonal matrix
136	// x: double vector to be multiplied
137	// M: length of vector x
138	void zDinx(double * z, double * D, double * x, int M){
139
140
141	long i, j, ckey;
142
143	#pragma omp parallel for private(ckey,j,i)
144	for (i=0; i<M; i++){
145	z[i]=1.0/D[i]*x[i];
146	}
147
148
149	}
150
151
152
153	// Sparse CSR matrix-vector product and vector addition: z = aAx + y
154	// @input z: double vector to store the result
155	// a: double to scale matrix-vector product by
156	// data: double vector with non-zero entries of A
157	// colind: long vector of column indicies of non-zero entries of A
158	// row_ptr: long vector giving index of rows for non-zero entires of A
159	// x: double vector to be multiplied
160	// y: double vector to add
161	// M: length of vector x
162	void zaAxpy(double * z, double a, double * data, long * colind, long * row_ptr, double * x,
163	double * y,int M){
164	long i, j, ckey;
165	#pragma omp parallel for private(ckey,j,i)
166	for (i=0; i<M; i++ ){
167	z[i]=y[i];
168
169	for (ckey=row_ptr[i]; ckey<row_ptr[i+1]; ckey++) {
170	j = colind[ckey];
171	z[i] += adata[ckey]x[j];
172	}
173
174	}
175
176	}
177
178	// Jacobi preconditioner for matrix, A, and right hand side, b. Mutiplies each row
179	// by one divided by the diagonal element of the matrix. If the diagonal
180	// element is zero, does nothing (should nnot occur)
181	// colind: long vector of column indicies of non-zero entries of A
182	// row_ptr: long vector giving index of rows for non-zero entires of A
183	// b: double vector specifying right hand side of equation to solve
184	// M: length of vector b
185
186	int _jacobi_precon_c(double* data,
187	long* colind,
188	long* row_ptr,
189	double * precon,
190	int M){
191
192
193	long i, j, k, ckey;
194	double diag;
195
196
197
198	#pragma omp parallel for private(diag,ckey,j,i)
199	for (i=0; i<M; i++){
200	diag = 0;
201	for (ckey=row_ptr[i]; ckey<row_ptr[i+1]; ckey++) {
202	j = colind[ckey];
203	if (i==j){
204	diag = data[ckey];
205	}
206	}
207	if (diag == 0){
208	diag =1;
209	}
210	precon[i]=diag;
211	}
212
213	return 0;
214
215	}
216
217	// Conjugate gradient solve Ax = b for x, A given in Sparse CSR format
218	// @input data: double vector with non-zero entries of A
219	// colind: long vector of column indicies of non-zero entries of A
220	// row_ptr: long vector giving index of rows for non-zero entires of A
221	// b: double vector specifying right hand side of equation to solve
222	// x: double vector with initial guess and to store result
223	// imax: maximum number of iterations
224	// tol: error tollerance for stopping criteria
225	// M: length of vectors x and b
226	// @return: 0 on success
227	int _cg_solve_c(double* data,
228	long* colind,
229	long* row_ptr,
230	double * b,
231	double * x,
232	int imax,
233	double tol,
234	double a_tol,
235	int M){
236
237	int i = 1;
238	double alpha,rTr,rTrOld,bt,rTr0;
239
240	double * d = malloc(sizeof(double)*M);
241	double * r = malloc(sizeof(double)*M);
242	double * q = malloc(sizeof(double)*M);
243	double * xold = malloc(sizeof(double)*M);
244
245	zaAxpy(r,-1.0,data,colind,row_ptr,x,b,M);
246	dcopy(M,r,d);
247
248	rTr=ddot(M,r,r);
249	rTr0 = rTr;
250
251	while((i<imax) && (rTr>pow(tol,2)*rTr0) && (rTr > pow(a_tol,2))){
252
253	zAx(q,data,colind,row_ptr,d,M);
254	alpha = rTr/ddot(M,d,q);
255	dcopy(M,x,xold);
256	daxpy(M,alpha,d,x);
257
258	daxpy(M,-alpha,q,r);
259	rTrOld = rTr;
260	rTr = ddot(M,r,r);
261
262	bt= rTr/rTrOld;
263
264	dscal(M,bt,d);
265	daxpy(M,1.0,r,d);
266
267	i=i+1;
268
269	}
270
271	free(d);
272	free(r);
273	free(q);
274	free(xold);
275
276	if (i>=imax){
277	return -1;
278	}
279	else{
280	return 0;
281	}
282
283
284	}
285
286	// Conjugate gradient solve Ax = b for x, A given in Sparse CSR format,
287	// using a diagonal preconditioner M.
288	// @input data: double vector with non-zero entries of A
289	// colind: long vector of column indicies of non-zero entries of A
290	// row_ptr: long vector giving index of rows for non-zero entires of A
291	// b: double vector specifying right hand side of equation to solve
292	// x: double vector with initial guess and to store result
293	// imax: maximum number of iterations
294	// tol: error tollerance for stopping criteria
295	// M: length of vectors x and b
296	// precon: diagonal preconditioner given as vector
297	// @return: 0 on success
298	int _cg_solve_c_precon(double* data,
299	long* colind,
300	long* row_ptr,
301	double * b,
302	double * x,
303	int imax,
304	double tol,
305	double a_tol,
306	int M,
307	double * precon){
308
309	int i = 1;
310	double alpha,rTr,rTrOld,bt,rTr0;
311
312	double * d = malloc(sizeof(double)*M);
313	double * r = malloc(sizeof(double)*M);
314	double * q = malloc(sizeof(double)*M);
315	double * xold = malloc(sizeof(double)*M);
316	double * rhat = malloc(sizeof(double)*M);
317	double * temp = malloc(sizeof(double)*M);
318
319	zaAxpy(r,-1.0,data,colind,row_ptr,x,b,M);
320	zDinx(rhat,precon,r,M);
321	dcopy(M,rhat,d);
322
323	rTr=ddot(M,r,rhat);
324	rTr0 = rTr;
325
326	while((i<imax) && (rTr>pow(tol,2)*rTr0) && (rTr > pow(a_tol,2))){
327
328	zAx(q,data,colind,row_ptr,d,M);
329	alpha = rTr/ddot(M,d,q);
330	dcopy(M,x,xold);
331	daxpy(M,alpha,d,x);
332
333	daxpy(M,-alpha,q,r);
334	zDinx(rhat,precon,r,M);
335	rTrOld = rTr;
336	rTr = ddot(M,r,rhat);
337
338	bt= rTr/rTrOld;
339
340	dscal(M,bt,d);
341	daxpy(M,1.0,rhat,d);
342
343	i=i+1;
344
345	}
346	free(temp);
347	free(rhat);
348	free(d);
349	free(r);
350	free(q);
351	free(xold);
352
353	if (i>=imax){
354	return -1;
355	}
356	else{
357	return 0;
358	}
359
360
361	}
362
363
364	/////////////////////////////////////////////////
365	// Gateways to Python
366
367	PyObject jacobi_precon_c(PyObject self, PyObject *args){
368
369	int M,err,bcols;
370
371	PyObject *csr_sparse; // input sparse matrix (must be CSR format)
372
373	PyArrayObject
374	*data, //Non Zeros Data array
375	*colind, //Column indices array
376	*row_ptr, //Row pointers array
377	*precon; //Right hand side
378
379
380	// Convert Python arguments to C
381	if (!PyArg_ParseTuple(args, "OO", &csr_sparse, &precon)) {
382	PyErr_SetString(PyExc_RuntimeError, "jacobi_precon_c could not parse input");
383	return NULL;
384	}
385
386	// Extract three subarrays making up the sparse matrix in CSR format.
387	data = (PyArrayObject*)
388	PyObject_GetAttrString(csr_sparse, "data");
389	if (!data) {
390	PyErr_SetString(PyExc_RuntimeError,
391	"Data array could not be allocated in cg_solve_c");
392	return NULL;
393	}
394
395	colind = (PyArrayObject*)
396	PyObject_GetAttrString(csr_sparse, "colind");
397	if (!colind) {
398	PyErr_SetString(PyExc_RuntimeError,
399	"Column index array could not be allocated in cg_solve_c");
400	return NULL;
401	}
402
403	row_ptr = (PyArrayObject*)
404	PyObject_GetAttrString(csr_sparse, "row_ptr");
405	if (!row_ptr) {
406	PyErr_SetString(PyExc_RuntimeError,
407	"Row pointer array could not be allocated in cg_solve_c");
408	}
409
410	M = (row_ptr -> dimensions[0])-1;
411
412	// Precondition the matrix and right hand side
413	err = _jacobi_precon_c((double*) data->data,
414	(long*) colind->data,
415	(long*) row_ptr->data,
416	(double *) precon->data,
417	M);
418
419	// Free extra references to sparse matrix parts
420	Py_DECREF(data);
421	Py_DECREF(colind);
422	Py_DECREF(row_ptr);
423
424	return Py_BuildValue("");
425
426
427	}
428
429	PyObject cg_solve_c(PyObject self, PyObject *args) {
430
431
432	PyObject *csr_sparse; // input sparse matrix (must be CSR format)
433
434	int imax,M,err,bcols;
435	double tol,a_tol;
436
437	PyArrayObject
438	*data, //Non Zeros Data array
439	*colind, //Column indices array
440	*row_ptr, //Row pointers array
441	*x0, //Initial guess - and sotrage of result.
442	*b; //Right hand side
443
444
445	// Convert Python arguments to C
446	if (!PyArg_ParseTuple(args, "OOOiddi", &csr_sparse, &x0, &b, &imax, &tol, &a_tol, &bcols)) {
447	PyErr_SetString(PyExc_RuntimeError, "cg_solve_c could not parse input");
448	return NULL;
449	}
450
451	// Extract three subarrays making up the sparse matrix in CSR format.
452	data = (PyArrayObject*)
453	PyObject_GetAttrString(csr_sparse, "data");
454	if (!data) {
455	PyErr_SetString(PyExc_RuntimeError,
456	"Data array could not be allocated in cg_solve_c");
457	return NULL;
458	}
459
460	colind = (PyArrayObject*)
461	PyObject_GetAttrString(csr_sparse, "colind");
462	if (!colind) {
463	PyErr_SetString(PyExc_RuntimeError,
464	"Column index array could not be allocated in cg_solve_c");
465	return NULL;
466	}
467
468	row_ptr = (PyArrayObject*)
469	PyObject_GetAttrString(csr_sparse, "row_ptr");
470	if (!row_ptr) {
471	PyErr_SetString(PyExc_RuntimeError,
472	"Row pointer array could not be allocated in cg_solve_c");
473	}
474
475	M = (row_ptr -> dimensions[0])-1;
476
477	// Solve system uisng conjugate gradient
478	err = _cg_solve_c((double*) data->data,
479	(long*) colind->data,
480	(long*) row_ptr->data,
481	(double *) b->data,
482	(double *) x0->data,
483	imax,
484	tol,
485	a_tol,
486	M);
487
488	// Free extra references to sparse matrix parts
489	Py_DECREF(data);
490	Py_DECREF(colind);
491	Py_DECREF(row_ptr);
492
493	return Py_BuildValue("i",err);
494	}
495
496	PyObject cg_solve_c_precon(PyObject self, PyObject *args) {
497
498
499	PyObject *csr_sparse; // input sparse matrix (must be CSR format)
500
501	int imax,M,err,bcols;
502	double tol,a_tol;
503
504	PyArrayObject
505	*data, //Non Zeros Data array
506	*colind, //Column indices array
507	*row_ptr, //Row pointers array
508	*x0, //Initial guess - and sotrage of result.
509	*b, //Right hand side
510	*precon; //diagonal preconditioner
511
512
513	// Convert Python arguments to C
514	if (!PyArg_ParseTuple(args, "OOOiddiO", &csr_sparse, &x0, &b, &imax, &tol, &a_tol, &bcols, &precon)) {
515	PyErr_SetString(PyExc_RuntimeError, "cg_solve_c_precon could not parse input");
516	return NULL;
517	}
518
519	// Extract three subarrays making up the sparse matrix in CSR format.
520	data = (PyArrayObject*)
521	PyObject_GetAttrString(csr_sparse, "data");
522	if (!data) {
523	PyErr_SetString(PyExc_RuntimeError,
524	"Data array could not be allocated in cg_solve_c");
525	return NULL;
526	}
527
528	colind = (PyArrayObject*)
529	PyObject_GetAttrString(csr_sparse, "colind");
530	if (!colind) {
531	PyErr_SetString(PyExc_RuntimeError,
532	"Column index array could not be allocated in cg_solve_c");
533	return NULL;
534	}
535
536	row_ptr = (PyArrayObject*)
537	PyObject_GetAttrString(csr_sparse, "row_ptr");
538	if (!row_ptr) {
539	PyErr_SetString(PyExc_RuntimeError,
540	"Row pointer array could not be allocated in cg_solve_c");
541	}
542
543	M = (row_ptr -> dimensions[0])-1;
544
545	// Solve system uisng conjugate gradient
546	err = _cg_solve_c_precon((double*) data->data,
547	(long*) colind->data,
548	(long*) row_ptr->data,
549	(double *) b->data,
550	(double *) x0->data,
551	imax,
552	tol,
553	a_tol,
554	M,
555	(double *) precon->data);
556
557	// Free extra references to sparse matrix parts
558	Py_DECREF(data);
559	Py_DECREF(colind);
560	Py_DECREF(row_ptr);
561
562	return Py_BuildValue("i",err);
563	}
564
565
566
567	// Method table for python module
568	static struct PyMethodDef MethodTable[] = {
569	{"cg_solve_c", cg_solve_c, METH_VARARGS, "Print out"},
570	{"cg_solve_c_precon", cg_solve_c_precon, METH_VARARGS, "Print out"},
571	{"jacobi_precon_c", jacobi_precon_c, METH_VARARGS, "Print out"},
572	{NULL, NULL, 0, NULL} /* sentinel */
573	};
574
575	// Module initialisation
576	void initcg_ext(void){
577	Py_InitModule("cg_ext", MethodTable);
578
579	import_array(); //Necessary for handling of NumPY structures
580	}
581

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/anuga_core/source/anuga/utilities/cg_ext.c @ 9072

Download in other formats: