2 // /home/couturie/work/petsc-3.5.1/arch-linux2-c-debug/bin/mpirun -np 4 ./ex15 -m 400 -n 400 -ksp_type fgmres
6 static char help[] = "Solves a linear system in parallel with KSP. Also\n\
7 illustrates setting a user-defined shell preconditioner and using the\n\
8 macro __FUNCT__ to define routine names for use in error handling.\n\
9 Input parameters include:\n\
10 -user_defined_pc : Activate a user-defined preconditioner\n\n";
13 Concepts: KSP^basic parallel example
14 Concepts: PC^setting a user-defined shell preconditioner
15 Concepts: error handling^Using the macro __FUNCT__ to define routine names;
20 Include "petscksp.h" so that we can use KSP solvers. Note that this file
21 automatically includes:
22 petscsys.h - base PETSc routines petscvec.h - vectors
24 petscis.h - index sets petscksp.h - Krylov subspace methods
25 petscviewer.h - viewers petscpc.h - preconditioners
29 /* Define context for user-provided preconditioner */
37 User-defined routines. Note that immediately before each routine below,
38 we define the macro __FUNCT__ to be a string containing the routine name.
39 If defined, this macro is used in the PETSc error handlers to provide a
40 complete traceback of routine names. All PETSc library routines use this
41 macro, and users can optionally employ it as well in their application
42 codes. Note that users can get a traceback of PETSc errors regardless of
43 whether they define __FUNCT__ in application codes; this macro merely
44 provides the added traceback detail of the application routine names.
47 #define __FUNCT__ "main"
53 int KrylovMinimize(Mat A, Vec b, Vec x) {
58 PetscScalar gamma, alpha, oldgamma, beta;
59 PetscReal norm=20, Eprecision=1e-8, cgprec=1e-40;
60 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
72 Vec Alpha, p, ss, vect, r, q, Ax;
77 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
78 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
79 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
86 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
89 MatGetOwnershipRange(A,&aa,&bb);
91 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
92 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
95 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
96 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
97 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
98 ierr = MatSetUp(S); CHKERRQ(ierr);
100 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
102 ierr = VecCreate(PETSC_COMM_WORLD, &Alpha); CHKERRQ(ierr);
103 ierr = VecSetSizes(Alpha, PETSC_DECIDE, ColS); CHKERRQ(ierr);
104 ierr = VecSetFromOptions(Alpha); CHKERRQ(ierr);
105 ierr = VecDuplicate(Alpha, &vect); CHKERRQ(ierr);
106 ierr = VecDuplicate(Alpha, &p); CHKERRQ(ierr);
107 ierr = VecDuplicate(Alpha, &ss); CHKERRQ(ierr);
108 ierr = VecDuplicate(b, &r); CHKERRQ(ierr);
109 ierr = VecDuplicate(b, &q); CHKERRQ(ierr);
110 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
112 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
113 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
117 //indexes of row (these indexes are global)
118 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
119 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
122 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
123 ierr = KSPSetTolerances(ksp, 1e-10, 1e-10, PETSC_DEFAULT, 16); CHKERRQ(ierr);
124 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
128 //GMRES WITH MINIMIZATION
130 while(giter<Emaxiter && norm>Eprecision ){
131 for(col=0; col<ColS && norm>Eprecision; col++){
135 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
137 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
138 total += its; Iiter ++;
143 ierr = VecGetArray(x, &array);
144 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
145 VecRestoreArray(x, &array);
150 ierr = VecCopy(x, residu); CHKERRQ(ierr);
151 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
152 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
156 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
157 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
164 if( norm>Eprecision) {
166 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
167 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
172 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
176 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
181 //Minimization with CGLS
182 MatMult(AS, Alpha, r); VecAYPX(r, -1, b); //r_0 = b-AS*x_0
185 MatMultTranspose(AS, r, p); //p_0 = AS'*r_0
190 ierr = VecCopy(p, ss); CHKERRQ(ierr); //p_0 = s_0
191 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
192 while(gamma>cgprec && iter<iterations){
193 MatMult(AS, p, q); //q = AS*p
194 VecNorm(q, NORM_2, &alpha); alpha = alpha * alpha; //alpha = norm2(q)^2
195 alpha = gamma / alpha;
196 VecAXPY(Alpha, alpha, p); //Alpha += alpha*p
197 VecAXPY(r, -alpha, q); //r -= alpha*q
198 MatMultTranspose(AS, r, ss); // ss = AS'*r
200 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
201 beta = gamma / oldgamma;
202 VecAYPX(p, beta, ss); //p = s+beta*p;
208 MatMult(S, Alpha, x); //x = S*Alpha;
213 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
214 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
228 int KrylovMinimizeLSQR(Mat A, Vec b, Vec x) {
233 PetscScalar alpha, beta;
234 PetscReal norm=20, Eprecision=1e-8, tol=1e-40;
235 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=20, Iiter=0;
241 PetscInt Istart,Iend;
248 PetscScalar norm_ksp;
249 Vec u,v,d,uu,vt,zero_long,zero_short,x_lsqr;
253 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
254 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
255 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
262 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
265 MatGetOwnershipRange(A,&aa,&bb);
267 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
268 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
271 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
272 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
273 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
274 ierr = MatSetUp(S); CHKERRQ(ierr);
276 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
280 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
282 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
283 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
287 ierr = VecDuplicate(b,&u);CHKERRQ(ierr);
290 ierr = VecDuplicate(b,&uu);CHKERRQ(ierr);
291 ierr = VecDuplicate(b,&zero_long);CHKERRQ(ierr);
292 ierr = VecSet(zero_long,0);CHKERRQ(ierr);
295 ierr = VecCreate(PETSC_COMM_WORLD, &v); CHKERRQ(ierr);
296 ierr = VecSetSizes(v, PETSC_DECIDE, ColS); CHKERRQ(ierr);
297 ierr = VecSetFromOptions(v); CHKERRQ(ierr);
298 ierr = VecDuplicate(v,&zero_short);CHKERRQ(ierr);
299 ierr = VecSet(zero_short,0);CHKERRQ(ierr);
300 ierr = VecDuplicate(v,&d);CHKERRQ(ierr);
301 ierr = VecDuplicate(v,&vt);CHKERRQ(ierr);
302 ierr = VecDuplicate(v,&x_lsqr);CHKERRQ(ierr);
305 //indexes of row (these indexes are global)
306 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
307 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
310 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
311 ierr = KSPSetTolerances(ksp, 1e-10, 1e-10, PETSC_DEFAULT, 16); CHKERRQ(ierr);
312 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
317 //GMRES WITH MINIMIZATION
319 while(giter<Emaxiter && norm>Eprecision ){
320 for(col=0; col<ColS && norm>Eprecision; col++){
324 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
326 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
327 total += its; Iiter ++;
332 ierr = VecGetArray(x, &array);
333 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
334 VecRestoreArray(x, &array);
339 ierr = VecCopy(x, residu); CHKERRQ(ierr);
340 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
341 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
345 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
346 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
353 if( norm>Eprecision) {
355 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
356 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
363 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
368 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
380 PetscScalar n2b,tolb,normr,c,s,phibar,normar,norma,thet,rhot,rho,phi;
383 VecNorm(b, NORM_2, &n2b); //n2b = norm(b);
384 ierr = VecCopy(b, u); CHKERRQ(ierr); //u=b
385 VecNorm(u, NORM_2, &beta); // beta=norm(u)
388 VecAYPX(u,1/beta,zero_long); // u = u / beta;
393 MatMultTranspose(AS, u, v); //v=A'*u
394 ierr = VecSet(x_lsqr,0);CHKERRQ(ierr);
395 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
397 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
399 ierr = VecSet(d,0);CHKERRQ(ierr);
400 normar = alpha * beta;
403 for(i=0;i<iterations;i++) {
404 MatMult(AS, v, uu); //uu=A*v
405 VecAYPX(u, -alpha, uu); //u = uu-alpha*u;
406 VecNorm(u, NORM_2, &beta); // beta=norm(u)
407 VecAYPX(u,1/beta,zero_long); // u = u / beta;
408 norma=sqrt(norma*norma+alpha*alpha+beta*beta); // norma = norm([norma alpha beta]);
411 rho = sqrt(rhot*rhot + beta*beta);
415 if (phi == 0) { // stagnation of the method
419 VecAYPX(d,-thet,v); //d = (v - thet * d);
420 VecAYPX(d,1/rho,zero_short); //d=d/ rho;
423 if (normar/(norma*normr) <= tol) { // check for convergence in min{|b-A*x|}
426 if (normr <= tolb) { // check for convergence in A*x=b
431 VecAXPY(x_lsqr,phi,d); // x_lsqr=x_lsqr+phi*d
432 normr = abs(s) * normr;
433 MatMultTranspose(AS, u, vt); //vt=A'*u;
434 VecAYPX(v,-beta,vt); // v = vt - beta * v;
435 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
436 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
437 normar = alpha * abs( s * phi);
444 MatMult(S, x_lsqr, x); //x = S*x_small;
449 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time LSQR : %g (s)\n", T2-T1); CHKERRQ(ierr);
450 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations LSQR : %D\n", total); CHKERRQ(ierr);
463 int main(int argc,char **args)
465 Vec x,b,u; /* approx solution, RHS, exact solution */
466 Mat A; /* linear system matrix */
467 KSP ksp; /* linear solver context */
468 PC pc; /* preconditioner context */
469 PetscReal norm; /* norm of solution error */
470 PetscScalar v,one = 1.0;
471 PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
474 PetscInitialize(&argc,&args,(char*)0,help);
475 ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
476 ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
478 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
479 Compute the matrix and right-hand-side vector that define
480 the linear system, Ax = b.
481 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
483 Create parallel matrix, specifying only its global dimensions.
484 When using MatCreate(), the matrix format can be specified at
485 runtime. Also, the parallel partioning of the matrix is
486 determined by PETSc at runtime.
488 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
489 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
490 ierr = MatSetFromOptions(A);CHKERRQ(ierr);
491 ierr = MatSetUp(A);CHKERRQ(ierr);
494 Currently, all PETSc parallel matrix formats are partitioned by
495 contiguous chunks of rows across the processors. Determine which
496 rows of the matrix are locally owned.
498 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
501 Set matrix elements for the 2-D, five-point stencil in parallel.
502 - Each processor needs to insert only elements that it owns
503 locally (but any non-local elements will be sent to the
504 appropriate processor during matrix assembly).
505 - Always specify global rows and columns of matrix entries.
507 for (Ii=Istart; Ii<Iend; Ii++) {
508 v = -1.0; i = Ii/n; j = Ii - i*n;
509 if (i>0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
510 if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
511 if (j>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
512 if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
513 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
517 Assemble matrix, using the 2-step process:
518 MatAssemblyBegin(), MatAssemblyEnd()
519 Computations can be done while messages are in transition
520 by placing code between these two statements.
522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526 Create parallel vectors.
527 - When using VecCreate() VecSetSizes() and VecSetFromOptions(),
528 we specify only the vector's global
529 dimension; the parallel partitioning is determined at runtime.
530 - Note: We form 1 vector from scratch and then duplicate as needed.
532 ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
533 ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
534 ierr = VecSetFromOptions(u);CHKERRQ(ierr);
535 ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
536 ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
539 Set exact solution; then compute right-hand-side vector.
541 ierr = VecSet(u,one);CHKERRQ(ierr);
542 ierr = MatMult(A,u,b);CHKERRQ(ierr);
544 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
545 Create the linear solver and set various options
546 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
549 Create linear solver context
551 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
554 Set operators. Here the matrix that defines the linear system
555 also serves as the preconditioning matrix.
557 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
560 Set linear solver defaults for this problem (optional).
561 - By extracting the KSP and PC contexts from the KSP context,
562 we can then directly call any KSP and PC routines
563 to set various options.
565 ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
566 ierr = KSPSetTolerances(ksp,1e-9,1e-9,PETSC_DEFAULT,5000000);CHKERRQ(ierr);
569 Set runtime options, e.g.,
570 -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
571 These options will override those specified above as long as
572 KSPSetFromOptions() is called _after_ any other customization
575 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
577 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
578 Solve the linear system
579 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
582 ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
584 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
585 Check solution and clean up
586 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
592 VecDuplicate(b,&sol);
595 VecNorm(sol, NORM_2, &norm);
596 ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
597 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr);
598 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n\n\n", T2-T1); CHKERRQ(ierr);
612 VecDuplicate(b,&sol);
614 KrylovMinimize(A, b, x2);
620 VecNorm(sol, NORM_2, &norm);
621 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization %g\n",norm);
631 VecDuplicate(b,&sol);
633 KrylovMinimizeLSQR(A, b, x2);
639 VecNorm(sol, NORM_2, &norm);
640 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization LSQR %g\n",norm);
647 Free work space. All PETSc objects should be destroyed when they
648 are no longer needed.
650 ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
651 ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr);
652 ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr);
654 ierr = PetscFinalize();