2 // /home/couturie/work/petsc-3.5.1/arch-linux2-c-debug/bin/mpirun -np 4 ./ex15 -m 400 -n 400 -ksp_type fgmres
6 static char help[] = "Solves a linear system in parallel with KSP. Also\n\
7 illustrates setting a user-defined shell preconditioner and using the\n\
8 macro __FUNCT__ to define routine names for use in error handling.\n\
9 Input parameters include:\n\
10 -user_defined_pc : Activate a user-defined preconditioner\n\n";
13 Concepts: KSP^basic parallel example
14 Concepts: PC^setting a user-defined shell preconditioner
15 Concepts: error handling^Using the macro __FUNCT__ to define routine names;
20 Include "petscksp.h" so that we can use KSP solvers. Note that this file
21 automatically includes:
22 petscsys.h - base PETSc routines petscvec.h - vectors
24 petscis.h - index sets petscksp.h - Krylov subspace methods
25 petscviewer.h - viewers petscpc.h - preconditioners
29 /* Define context for user-provided preconditioner */
37 User-defined routines. Note that immediately before each routine below,
38 we define the macro __FUNCT__ to be a string containing the routine name.
39 If defined, this macro is used in the PETSc error handlers to provide a
40 complete traceback of routine names. All PETSc library routines use this
41 macro, and users can optionally employ it as well in their application
42 codes. Note that users can get a traceback of PETSc errors regardless of
43 whether they define __FUNCT__ in application codes; this macro merely
44 provides the added traceback detail of the application routine names.
47 #define __FUNCT__ "main"
53 int KrylovMinimize(Mat A, Vec b, Vec x) {
58 PetscScalar gamma, alpha, oldgamma, beta;
59 PetscReal norm=20, Eprecision=1e-6, cgprec=1e-40;
60 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
72 Vec Alpha, p, ss, vect, r, q, Ax;
77 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
78 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
79 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
86 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
89 MatGetOwnershipRange(A,&aa,&bb);
91 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
92 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
95 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
96 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
97 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
98 ierr = MatSetUp(S); CHKERRQ(ierr);
100 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
102 ierr = VecCreate(PETSC_COMM_WORLD, &Alpha); CHKERRQ(ierr);
103 ierr = VecSetSizes(Alpha, PETSC_DECIDE, ColS); CHKERRQ(ierr);
104 ierr = VecSetFromOptions(Alpha); CHKERRQ(ierr);
105 ierr = VecDuplicate(Alpha, &vect); CHKERRQ(ierr);
106 ierr = VecDuplicate(Alpha, &p); CHKERRQ(ierr);
107 ierr = VecDuplicate(Alpha, &ss); CHKERRQ(ierr);
108 ierr = VecDuplicate(b, &r); CHKERRQ(ierr);
109 ierr = VecDuplicate(b, &q); CHKERRQ(ierr);
110 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
112 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
113 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
117 //indexes of row (these indexes are global)
118 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
119 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
122 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
123 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 16); CHKERRQ(ierr);
124 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
128 //GMRES WITH MINIMIZATION
130 while(giter<Emaxiter && norm>Eprecision ){
131 for(col=0; col<ColS && norm>Eprecision; col++){
135 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
137 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
138 total += its; Iiter ++;
143 ierr = VecGetArray(x, &array);
144 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
145 VecRestoreArray(x, &array);
149 KSPGetResidualNorm(ksp,&norm);
152 ierr = VecCopy(x, residu); CHKERRQ(ierr);
153 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
154 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
158 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
159 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
166 if( norm>Eprecision) {
168 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
169 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
174 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
178 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
183 //Minimization with CGLS
184 MatMult(AS, Alpha, r); VecAYPX(r, -1, b); //r_0 = b-AS*x_0
187 MatMultTranspose(AS, r, p); //p_0 = AS'*r_0
192 ierr = VecCopy(p, ss); CHKERRQ(ierr); //p_0 = s_0
193 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
194 while(gamma>cgprec && iter<iterations){
195 MatMult(AS, p, q); //q = AS*p
196 VecNorm(q, NORM_2, &alpha); alpha = alpha * alpha; //alpha = norm2(q)^2
197 alpha = gamma / alpha;
198 VecAXPY(Alpha, alpha, p); //Alpha += alpha*p
199 VecAXPY(r, -alpha, q); //r -= alpha*q
200 MatMultTranspose(AS, r, ss); // ss = AS'*r
202 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
203 beta = gamma / oldgamma;
204 VecAYPX(p, beta, ss); //p = s+beta*p;
210 MatMult(S, Alpha, x); //x = S*Alpha;
215 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
216 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
230 int KrylovMinimizeLSQR(Mat A, Vec b, Vec x) {
235 PetscScalar alpha, beta;
236 PetscReal norm=20, Eprecision=1e-6, tol=1e-40;
237 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=20, Iiter=0;
243 PetscInt Istart,Iend;
250 PetscScalar norm_ksp;
251 Vec u,v,d,uu,vt,zero_long,zero_short,x_lsqr;
255 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
256 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
257 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
264 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
267 MatGetOwnershipRange(A,&aa,&bb);
269 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
270 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
273 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
274 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
275 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
276 ierr = MatSetUp(S); CHKERRQ(ierr);
278 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
282 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
284 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
285 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
289 ierr = VecDuplicate(b,&u);CHKERRQ(ierr);
292 ierr = VecDuplicate(b,&uu);CHKERRQ(ierr);
293 ierr = VecDuplicate(b,&zero_long);CHKERRQ(ierr);
294 ierr = VecSet(zero_long,0);CHKERRQ(ierr);
297 ierr = VecCreate(PETSC_COMM_WORLD, &v); CHKERRQ(ierr);
298 ierr = VecSetSizes(v, PETSC_DECIDE, ColS); CHKERRQ(ierr);
299 ierr = VecSetFromOptions(v); CHKERRQ(ierr);
300 ierr = VecDuplicate(v,&zero_short);CHKERRQ(ierr);
301 ierr = VecSet(zero_short,0);CHKERRQ(ierr);
302 ierr = VecDuplicate(v,&d);CHKERRQ(ierr);
303 ierr = VecDuplicate(v,&vt);CHKERRQ(ierr);
304 ierr = VecDuplicate(v,&x_lsqr);CHKERRQ(ierr);
307 //indexes of row (these indexes are global)
308 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
309 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
312 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
313 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 16); CHKERRQ(ierr);
314 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
319 //GMRES WITH MINIMIZATION
321 while(giter<Emaxiter && norm>Eprecision ){
322 for(col=0; col<ColS && norm>Eprecision; col++){
326 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
328 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
329 total += its; Iiter ++;
334 ierr = VecGetArray(x, &array);
335 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
336 VecRestoreArray(x, &array);
340 KSPGetResidualNorm(ksp,&norm);
344 ierr = VecCopy(x, residu); CHKERRQ(ierr);
345 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
346 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
350 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
351 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
358 if( norm>Eprecision) {
360 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
361 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
368 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
373 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
385 PetscScalar n2b,tolb,normr,c,s,phibar,normar,norma,thet,rhot,rho,phi;
388 VecNorm(b, NORM_2, &n2b); //n2b = norm(b);
389 ierr = VecCopy(b, u); CHKERRQ(ierr); //u=b
390 VecNorm(u, NORM_2, &beta); // beta=norm(u)
393 VecAYPX(u,1/beta,zero_long); // u = u / beta;
398 MatMultTranspose(AS, u, v); //v=A'*u
399 ierr = VecSet(x_lsqr,0);CHKERRQ(ierr);
400 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
402 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
404 ierr = VecSet(d,0);CHKERRQ(ierr);
405 normar = alpha * beta;
408 for(i=0;i<iterations;i++) {
409 MatMult(AS, v, uu); //uu=A*v
410 VecAYPX(u, -alpha, uu); //u = uu-alpha*u;
411 VecNorm(u, NORM_2, &beta); // beta=norm(u)
412 VecAYPX(u,1/beta,zero_long); // u = u / beta;
413 norma=sqrt(norma*norma+alpha*alpha+beta*beta); // norma = norm([norma alpha beta]);
416 rho = sqrt(rhot*rhot + beta*beta);
420 if (phi == 0) { // stagnation of the method
424 VecAYPX(d,-thet,v); //d = (v - thet * d);
425 VecAYPX(d,1/rho,zero_short); //d=d/ rho;
428 if (normar/(norma*normr) <= tol) { // check for convergence in min{|b-A*x|}
431 if (normr <= tolb) { // check for convergence in A*x=b
436 VecAXPY(x_lsqr,phi,d); // x_lsqr=x_lsqr+phi*d
437 normr = abs(s) * normr;
438 MatMultTranspose(AS, u, vt); //vt=A'*u;
439 VecAYPX(v,-beta,vt); // v = vt - beta * v;
440 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
441 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
442 normar = alpha * abs( s * phi);
449 MatMult(S, x_lsqr, x); //x = S*x_small;
454 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time LSQR : %g (s)\n", T2-T1); CHKERRQ(ierr);
455 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations LSQR : %D\n", total); CHKERRQ(ierr);
468 int main(int argc,char **args)
470 Vec x,b,u; /* approx solution, RHS, exact solution */
471 Mat A; /* linear system matrix */
472 KSP ksp; /* linear solver context */
473 PC pc; /* preconditioner context */
474 PetscReal norm; /* norm of solution error */
475 PetscScalar v,one = 1.0;
476 PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
479 PetscInitialize(&argc,&args,(char*)0,help);
480 ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
481 ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
484 MPI_Comm_size(PETSC_COMM_WORLD,&size);
485 PetscPrintf(PETSC_COMM_WORLD,"Number of processors = %d\n",size);
487 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
488 Compute the matrix and right-hand-side vector that define
489 the linear system, Ax = b.
490 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
492 Create parallel matrix, specifying only its global dimensions.
493 When using MatCreate(), the matrix format can be specified at
494 runtime. Also, the parallel partioning of the matrix is
495 determined by PETSc at runtime.
497 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
498 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
499 ierr = MatSetFromOptions(A);CHKERRQ(ierr);
500 ierr = MatSetUp(A);CHKERRQ(ierr);
503 Currently, all PETSc parallel matrix formats are partitioned by
504 contiguous chunks of rows across the processors. Determine which
505 rows of the matrix are locally owned.
507 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
510 Set matrix elements for the 2-D, five-point stencil in parallel.
511 - Each processor needs to insert only elements that it owns
512 locally (but any non-local elements will be sent to the
513 appropriate processor during matrix assembly).
514 - Always specify global rows and columns of matrix entries.
516 for (Ii=Istart; Ii<Iend; Ii++) {
517 v = -1.0; i = Ii/n; j = Ii - i*n;
518 if (i>0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
519 if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
520 if (j>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
521 if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
522 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
526 Assemble matrix, using the 2-step process:
527 MatAssemblyBegin(), MatAssemblyEnd()
528 Computations can be done while messages are in transition
529 by placing code between these two statements.
531 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
532 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
535 Create parallel vectors.
536 - When using VecCreate() VecSetSizes() and VecSetFromOptions(),
537 we specify only the vector's global
538 dimension; the parallel partitioning is determined at runtime.
539 - Note: We form 1 vector from scratch and then duplicate as needed.
541 ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
542 ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
543 ierr = VecSetFromOptions(u);CHKERRQ(ierr);
544 ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
545 ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
548 Set exact solution; then compute right-hand-side vector.
550 ierr = VecSet(u,one);CHKERRQ(ierr);
551 ierr = MatMult(A,u,b);CHKERRQ(ierr);
553 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
554 Create the linear solver and set various options
555 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
558 Create linear solver context
560 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
563 Set operators. Here the matrix that defines the linear system
564 also serves as the preconditioning matrix.
566 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
569 Set linear solver defaults for this problem (optional).
570 - By extracting the KSP and PC contexts from the KSP context,
571 we can then directly call any KSP and PC routines
572 to set various options.
574 ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
575 ierr = KSPSetTolerances(ksp,1e-7,1e-7,PETSC_DEFAULT,5000000);CHKERRQ(ierr);
578 Set runtime options, e.g.,
579 -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
580 These options will override those specified above as long as
581 KSPSetFromOptions() is called _after_ any other customization
584 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
586 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
587 Solve the linear system
588 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
591 ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
593 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
594 Check solution and clean up
595 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
601 VecDuplicate(b,&sol);
604 VecNorm(sol, NORM_2, &norm);
605 ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
606 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr);
607 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n\n\n", T2-T1); CHKERRQ(ierr);
621 VecDuplicate(b,&sol);
623 KrylovMinimize(A, b, x2);
629 VecNorm(sol, NORM_2, &norm);
630 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization %g\n",norm);
640 VecDuplicate(b,&sol);
642 KrylovMinimizeLSQR(A, b, x2);
648 VecNorm(sol, NORM_2, &norm);
649 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization LSQR %g\n",norm);
656 Free work space. All PETSc objects should be destroyed when they
657 are no longer needed.
659 ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
660 ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr);
661 ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr);
663 ierr = PetscFinalize();