2 // /home/couturie/work/petsc-3.5.1/arch-linux2-c-debug/bin/mpirun -np 4 ./ex15 -m 400 -n 400 -ksp_type fgmres
6 static char help[] = "Solves a linear system in parallel with KSP. Also\n\
7 illustrates setting a user-defined shell preconditioner and using the\n\
8 macro __FUNCT__ to define routine names for use in error handling.\n\
9 Input parameters include:\n\
10 -user_defined_pc : Activate a user-defined preconditioner\n\n";
13 Concepts: KSP^basic parallel example
14 Concepts: PC^setting a user-defined shell preconditioner
15 Concepts: error handling^Using the macro __FUNCT__ to define routine names;
20 Include "petscksp.h" so that we can use KSP solvers. Note that this file
21 automatically includes:
22 petscsys.h - base PETSc routines petscvec.h - vectors
24 petscis.h - index sets petscksp.h - Krylov subspace methods
25 petscviewer.h - viewers petscpc.h - preconditioners
29 /* Define context for user-provided preconditioner */
37 User-defined routines. Note that immediately before each routine below,
38 we define the macro __FUNCT__ to be a string containing the routine name.
39 If defined, this macro is used in the PETSc error handlers to provide a
40 complete traceback of routine names. All PETSc library routines use this
41 macro, and users can optionally employ it as well in their application
42 codes. Note that users can get a traceback of PETSc errors regardless of
43 whether they define __FUNCT__ in application codes; this macro merely
44 provides the added traceback detail of the application routine names.
47 #define __FUNCT__ "main"
53 int KrylovMinimize(Mat A, Vec b, Vec x) {
58 PetscScalar gamma, alpha, oldgamma, beta;
59 PetscReal norm=20, Eprecision=1e-6, cgprec=1e-40;
60 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
72 Vec Alpha, p, ss, vect, r, q, Ax;
77 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
78 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
79 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
86 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
89 MatGetOwnershipRange(A,&aa,&bb);
91 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
92 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
95 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
96 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
97 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
98 ierr = MatSetUp(S); CHKERRQ(ierr);
100 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
102 ierr = VecCreate(PETSC_COMM_WORLD, &Alpha); CHKERRQ(ierr);
103 ierr = VecSetSizes(Alpha, PETSC_DECIDE, ColS); CHKERRQ(ierr);
104 ierr = VecSetFromOptions(Alpha); CHKERRQ(ierr);
105 ierr = VecDuplicate(Alpha, &vect); CHKERRQ(ierr);
106 ierr = VecDuplicate(Alpha, &p); CHKERRQ(ierr);
107 ierr = VecDuplicate(Alpha, &ss); CHKERRQ(ierr);
108 ierr = VecDuplicate(b, &r); CHKERRQ(ierr);
109 ierr = VecDuplicate(b, &q); CHKERRQ(ierr);
110 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
112 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
113 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
117 //indexes of row (these indexes are global)
118 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
119 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
122 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
123 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 30); CHKERRQ(ierr);
124 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
128 //GMRES WITH MINIMIZATION
130 ierr = KSPSetUp(ksp); CHKERRQ(ierr);
131 while(giter<Emaxiter && norm>Eprecision ){
132 for(col=0; col<ColS && norm>Eprecision; col++){
136 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
138 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
139 total += its; Iiter ++;
144 ierr = VecGetArray(x, &array);
145 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
146 VecRestoreArray(x, &array);
150 KSPGetResidualNorm(ksp,&norm);
153 ierr = VecCopy(x, residu); CHKERRQ(ierr);
154 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
155 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
159 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
160 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
167 if( norm>Eprecision) {
169 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
170 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
175 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
179 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
184 //Minimization with CGLS
185 MatMult(AS, Alpha, r); VecAYPX(r, -1, b); //r_0 = b-AS*x_0
188 MatMultTranspose(AS, r, p); //p_0 = AS'*r_0
193 ierr = VecCopy(p, ss); CHKERRQ(ierr); //p_0 = s_0
194 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
195 while(gamma>cgprec && iter<iterations){
196 MatMult(AS, p, q); //q = AS*p
197 VecNorm(q, NORM_2, &alpha); alpha = alpha * alpha; //alpha = norm2(q)^2
198 alpha = gamma / alpha;
199 VecAXPY(Alpha, alpha, p); //Alpha += alpha*p
200 VecAXPY(r, -alpha, q); //r -= alpha*q
201 MatMultTranspose(AS, r, ss); // ss = AS'*r
203 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
204 beta = gamma / oldgamma;
205 VecAYPX(p, beta, ss); //p = s+beta*p;
211 MatMult(S, Alpha, x); //x = S*Alpha;
216 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
217 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
231 int KrylovMinimizeLSQR(Mat A, Vec b, Vec x) {
236 PetscScalar alpha, beta;
237 PetscReal norm=20, Eprecision=1e-6, tol=1e-40;
238 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=20, Iiter=0;
244 PetscInt Istart,Iend;
251 PetscScalar norm_ksp;
252 Vec u,v,d,uu,vt,zero_long,zero_short,x_lsqr;
256 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
257 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
258 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
265 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
268 MatGetOwnershipRange(A,&aa,&bb);
270 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
271 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
274 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
275 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
276 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
277 ierr = MatSetUp(S); CHKERRQ(ierr);
279 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
283 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
285 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
286 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
290 ierr = VecDuplicate(b,&u);CHKERRQ(ierr);
293 ierr = VecDuplicate(b,&uu);CHKERRQ(ierr);
294 ierr = VecDuplicate(b,&zero_long);CHKERRQ(ierr);
295 ierr = VecSet(zero_long,0);CHKERRQ(ierr);
298 ierr = VecCreate(PETSC_COMM_WORLD, &v); CHKERRQ(ierr);
299 ierr = VecSetSizes(v, PETSC_DECIDE, ColS); CHKERRQ(ierr);
300 ierr = VecSetFromOptions(v); CHKERRQ(ierr);
301 ierr = VecDuplicate(v,&zero_short);CHKERRQ(ierr);
302 ierr = VecSet(zero_short,0);CHKERRQ(ierr);
303 ierr = VecDuplicate(v,&d);CHKERRQ(ierr);
304 ierr = VecDuplicate(v,&vt);CHKERRQ(ierr);
305 ierr = VecDuplicate(v,&x_lsqr);CHKERRQ(ierr);
308 //indexes of row (these indexes are global)
309 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
310 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
313 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
314 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 30); CHKERRQ(ierr);
315 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
320 //GMRES WITH MINIMIZATION
322 ierr = KSPSetUp(ksp); CHKERRQ(ierr);
323 while(giter<Emaxiter && norm>Eprecision ){
324 for(col=0; col<ColS && norm>Eprecision; col++){
328 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
330 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
331 total += its; Iiter ++;
336 ierr = VecGetArray(x, &array);
337 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
338 VecRestoreArray(x, &array);
342 KSPGetResidualNorm(ksp,&norm);
346 ierr = VecCopy(x, residu); CHKERRQ(ierr);
347 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
348 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
352 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
353 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
360 if( norm>Eprecision) {
362 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
363 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
370 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
375 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
387 PetscScalar n2b,tolb,normr,c,s,phibar,normar,norma,thet,rhot,rho,phi;
390 VecNorm(b, NORM_2, &n2b); //n2b = norm(b);
391 ierr = VecCopy(b, u); CHKERRQ(ierr); //u=b
392 VecNorm(u, NORM_2, &beta); // beta=norm(u)
395 VecAYPX(u,1/beta,zero_long); // u = u / beta;
400 MatMultTranspose(AS, u, v); //v=A'*u
401 ierr = VecSet(x_lsqr,0);CHKERRQ(ierr);
402 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
404 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
406 ierr = VecSet(d,0);CHKERRQ(ierr);
407 normar = alpha * beta;
410 for(i=0;i<iterations;i++) {
411 MatMult(AS, v, uu); //uu=A*v
412 VecAYPX(u, -alpha, uu); //u = uu-alpha*u;
413 VecNorm(u, NORM_2, &beta); // beta=norm(u)
414 VecAYPX(u,1/beta,zero_long); // u = u / beta;
415 norma=sqrt(norma*norma+alpha*alpha+beta*beta); // norma = norm([norma alpha beta]);
418 rho = sqrt(rhot*rhot + beta*beta);
422 if (phi == 0) { // stagnation of the method
426 VecAYPX(d,-thet,v); //d = (v - thet * d);
427 VecAYPX(d,1/rho,zero_short); //d=d/ rho;
430 if (normar/(norma*normr) <= tol) { // check for convergence in min{|b-A*x|}
433 if (normr <= tolb) { // check for convergence in A*x=b
438 VecAXPY(x_lsqr,phi,d); // x_lsqr=x_lsqr+phi*d
439 normr = abs(s) * normr;
440 MatMultTranspose(AS, u, vt); //vt=A'*u;
441 VecAYPX(v,-beta,vt); // v = vt - beta * v;
442 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
443 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
444 normar = alpha * abs( s * phi);
451 MatMult(S, x_lsqr, x); //x = S*x_small;
456 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time LSQR : %g (s)\n", T2-T1); CHKERRQ(ierr);
457 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations LSQR : %D\n", total); CHKERRQ(ierr);
470 int main(int argc,char **args)
472 Vec x,b,u; /* approx solution, RHS, exact solution */
473 Mat A; /* linear system matrix */
474 KSP ksp; /* linear solver context */
475 PC pc; /* preconditioner context */
476 PetscReal norm; /* norm of solution error */
477 PetscScalar v,one = 1.0;
478 PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
481 PetscInitialize(&argc,&args,(char*)0,help);
482 ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
483 ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
486 MPI_Comm_size(PETSC_COMM_WORLD,&size);
487 PetscPrintf(PETSC_COMM_WORLD,"Number of processors = %d\n",size);
489 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
490 Compute the matrix and right-hand-side vector that define
491 the linear system, Ax = b.
492 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
494 Create parallel matrix, specifying only its global dimensions.
495 When using MatCreate(), the matrix format can be specified at
496 runtime. Also, the parallel partioning of the matrix is
497 determined by PETSc at runtime.
499 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
500 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
501 ierr = MatSetFromOptions(A);CHKERRQ(ierr);
502 ierr = MatSetUp(A);CHKERRQ(ierr);
505 Currently, all PETSc parallel matrix formats are partitioned by
506 contiguous chunks of rows across the processors. Determine which
507 rows of the matrix are locally owned.
509 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
512 Set matrix elements for the 2-D, five-point stencil in parallel.
513 - Each processor needs to insert only elements that it owns
514 locally (but any non-local elements will be sent to the
515 appropriate processor during matrix assembly).
516 - Always specify global rows and columns of matrix entries.
518 for (Ii=Istart; Ii<Iend; Ii++) {
519 v = -1.0; i = Ii/n; j = Ii - i*n;
521 if (i>0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v2,INSERT_VALUES);CHKERRQ(ierr);}
522 if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
523 if (j>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
524 if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
525 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
529 Assemble matrix, using the 2-step process:
530 MatAssemblyBegin(), MatAssemblyEnd()
531 Computations can be done while messages are in transition
532 by placing code between these two statements.
534 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
538 Create parallel vectors.
539 - When using VecCreate() VecSetSizes() and VecSetFromOptions(),
540 we specify only the vector's global
541 dimension; the parallel partitioning is determined at runtime.
542 - Note: We form 1 vector from scratch and then duplicate as needed.
544 ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
545 ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
546 ierr = VecSetFromOptions(u);CHKERRQ(ierr);
547 ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
548 ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
551 Set exact solution; then compute right-hand-side vector.
553 ierr = VecSet(u,one);CHKERRQ(ierr);
554 ierr = MatMult(A,u,b);CHKERRQ(ierr);
556 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
557 Create the linear solver and set various options
558 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
561 Create linear solver context
563 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
566 Set operators. Here the matrix that defines the linear system
567 also serves as the preconditioning matrix.
569 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
572 Set linear solver defaults for this problem (optional).
573 - By extracting the KSP and PC contexts from the KSP context,
574 we can then directly call any KSP and PC routines
575 to set various options.
577 ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
578 ierr = KSPSetTolerances(ksp,1e-7,1e-7,PETSC_DEFAULT,5000000);CHKERRQ(ierr);
582 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
585 PCGetType(pc, &type);
587 PetscPrintf(PETSC_COMM_WORLD, "PC TYPE %s \n", type);
591 Set runtime options, e.g.,
592 -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
593 These options will override those specified above as long as
594 KSPSetFromOptions() is called _after_ any other customization
597 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
599 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
600 Solve the linear system
601 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
604 ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
606 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
607 Check solution and clean up
608 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
614 VecDuplicate(b,&sol);
617 VecNorm(sol, NORM_2, &norm);
618 ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
619 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr);
620 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n\n\n", T2-T1); CHKERRQ(ierr);
634 VecDuplicate(b,&sol);
636 KrylovMinimize(A, b, x2);
642 VecNorm(sol, NORM_2, &norm);
643 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization %g\n",norm);
653 VecDuplicate(b,&sol);
655 KrylovMinimizeLSQR(A, b, x2);
661 VecNorm(sol, NORM_2, &norm);
662 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization LSQR %g\n",norm);
669 Free work space. All PETSc objects should be destroyed when they
670 are no longer needed.
672 ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
673 ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr);
674 ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr);
676 ierr = PetscFinalize();