2 // /home/couturie/work/petsc-3.5.1/arch-linux2-c-debug/bin/mpirun -np 4 ./ex15 -m 400 -n 400 -ksp_type fgmres
6 static char help[] = "Solves a linear system in parallel with KSP. Also\n\
7 illustrates setting a user-defined shell preconditioner and using the\n\
8 macro __FUNCT__ to define routine names for use in error handling.\n\
9 Input parameters include:\n\
10 -user_defined_pc : Activate a user-defined preconditioner\n\n";
13 Concepts: KSP^basic parallel example
14 Concepts: PC^setting a user-defined shell preconditioner
15 Concepts: error handling^Using the macro __FUNCT__ to define routine names;
20 Include "petscksp.h" so that we can use KSP solvers. Note that this file
21 automatically includes:
22 petscsys.h - base PETSc routines petscvec.h - vectors
24 petscis.h - index sets petscksp.h - Krylov subspace methods
25 petscviewer.h - viewers petscpc.h - preconditioners
29 /* Define context for user-provided preconditioner */
37 User-defined routines. Note that immediately before each routine below,
38 we define the macro __FUNCT__ to be a string containing the routine name.
39 If defined, this macro is used in the PETSc error handlers to provide a
40 complete traceback of routine names. All PETSc library routines use this
41 macro, and users can optionally employ it as well in their application
42 codes. Note that users can get a traceback of PETSc errors regardless of
43 whether they define __FUNCT__ in application codes; this macro merely
44 provides the added traceback detail of the application routine names.
47 #define __FUNCT__ "main"
53 int KrylovMinimize(Mat A, Vec b, Vec x) {
58 PetscScalar gamma, alpha, oldgamma, beta;
59 PetscReal norm=20, Eprecision=1e-8, cgprec=1e-40;
60 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
72 Vec Alpha, p, ss, vect, r, q, Ax;
77 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
78 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
79 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
86 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
89 MatGetOwnershipRange(A,&aa,&bb);
91 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
92 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
95 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
96 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
97 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
98 ierr = MatSetUp(S); CHKERRQ(ierr);
100 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
102 ierr = VecCreate(PETSC_COMM_WORLD, &Alpha); CHKERRQ(ierr);
103 ierr = VecSetSizes(Alpha, PETSC_DECIDE, ColS); CHKERRQ(ierr);
104 ierr = VecSetFromOptions(Alpha); CHKERRQ(ierr);
105 ierr = VecDuplicate(Alpha, &vect); CHKERRQ(ierr);
106 ierr = VecDuplicate(Alpha, &p); CHKERRQ(ierr);
107 ierr = VecDuplicate(Alpha, &ss); CHKERRQ(ierr);
108 ierr = VecDuplicate(b, &r); CHKERRQ(ierr);
109 ierr = VecDuplicate(b, &q); CHKERRQ(ierr);
110 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
112 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
113 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
117 //indexes of row (these indexes are global)
118 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
119 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
122 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
123 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 16); CHKERRQ(ierr);
124 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
128 //GMRES WITH MINIMIZATION
130 while(giter<Emaxiter && norm>Eprecision ){
131 for(col=0; col<ColS && norm>Eprecision; col++){
135 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
137 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
138 total += its; Iiter ++;
143 ierr = VecGetArray(x, &array);
144 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
145 VecRestoreArray(x, &array);
149 KSPGetResidualNorm(ksp,&norm);
152 ierr = VecCopy(x, residu); CHKERRQ(ierr);
153 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
154 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
158 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
159 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
166 if( norm>Eprecision) {
168 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
169 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
174 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
178 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
183 //Minimization with CGLS
184 MatMult(AS, Alpha, r); VecAYPX(r, -1, b); //r_0 = b-AS*x_0
187 MatMultTranspose(AS, r, p); //p_0 = AS'*r_0
192 ierr = VecCopy(p, ss); CHKERRQ(ierr); //p_0 = s_0
193 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
194 while(gamma>cgprec && iter<iterations){
195 MatMult(AS, p, q); //q = AS*p
196 VecNorm(q, NORM_2, &alpha); alpha = alpha * alpha; //alpha = norm2(q)^2
197 alpha = gamma / alpha;
198 VecAXPY(Alpha, alpha, p); //Alpha += alpha*p
199 VecAXPY(r, -alpha, q); //r -= alpha*q
200 MatMultTranspose(AS, r, ss); // ss = AS'*r
202 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
203 beta = gamma / oldgamma;
204 VecAYPX(p, beta, ss); //p = s+beta*p;
210 MatMult(S, Alpha, x); //x = S*Alpha;
215 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
216 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
230 int KrylovMinimizeLSQR(Mat A, Vec b, Vec x) {
235 PetscScalar alpha, beta;
236 PetscReal norm=20, Eprecision=1e-8, tol=1e-40;
237 PetscInt giter=0, ColS=12, col=0, Emaxiter=50000000, iter=0, iterations=20, Iiter=0;
243 PetscInt Istart,Iend;
250 PetscScalar norm_ksp;
251 Vec u,v,d,uu,vt,zero_long,zero_short,x_lsqr;
255 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
256 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
257 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
264 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
267 MatGetOwnershipRange(A,&aa,&bb);
269 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
270 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
273 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
274 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
275 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
276 ierr = MatSetUp(S); CHKERRQ(ierr);
278 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
282 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
284 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
285 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
289 ierr = VecDuplicate(b,&u);CHKERRQ(ierr);
292 ierr = VecDuplicate(b,&uu);CHKERRQ(ierr);
293 ierr = VecDuplicate(b,&zero_long);CHKERRQ(ierr);
294 ierr = VecSet(zero_long,0);CHKERRQ(ierr);
297 ierr = VecCreate(PETSC_COMM_WORLD, &v); CHKERRQ(ierr);
298 ierr = VecSetSizes(v, PETSC_DECIDE, ColS); CHKERRQ(ierr);
299 ierr = VecSetFromOptions(v); CHKERRQ(ierr);
300 ierr = VecDuplicate(v,&zero_short);CHKERRQ(ierr);
301 ierr = VecSet(zero_short,0);CHKERRQ(ierr);
302 ierr = VecDuplicate(v,&d);CHKERRQ(ierr);
303 ierr = VecDuplicate(v,&vt);CHKERRQ(ierr);
304 ierr = VecDuplicate(v,&x_lsqr);CHKERRQ(ierr);
307 //indexes of row (these indexes are global)
308 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
309 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
312 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
313 ierr = KSPSetTolerances(ksp, 1e-10, 1e-10, PETSC_DEFAULT, 16); CHKERRQ(ierr);
314 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
319 //GMRES WITH MINIMIZATION
321 while(giter<Emaxiter && norm>Eprecision ){
322 for(col=0; col<ColS && norm>Eprecision; col++){
326 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
328 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
329 total += its; Iiter ++;
334 ierr = VecGetArray(x, &array);
335 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
336 VecRestoreArray(x, &array);
340 KSPGetResidualNorm(ksp,&norm);
344 ierr = VecCopy(x, residu); CHKERRQ(ierr);
345 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
346 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
350 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
351 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
358 if( norm>Eprecision) {
360 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
361 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
368 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
373 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
385 PetscScalar n2b,tolb,normr,c,s,phibar,normar,norma,thet,rhot,rho,phi;
388 VecNorm(b, NORM_2, &n2b); //n2b = norm(b);
389 ierr = VecCopy(b, u); CHKERRQ(ierr); //u=b
390 VecNorm(u, NORM_2, &beta); // beta=norm(u)
393 VecAYPX(u,1/beta,zero_long); // u = u / beta;
398 MatMultTranspose(AS, u, v); //v=A'*u
399 ierr = VecSet(x_lsqr,0);CHKERRQ(ierr);
400 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
402 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
404 ierr = VecSet(d,0);CHKERRQ(ierr);
405 normar = alpha * beta;
408 for(i=0;i<iterations;i++) {
409 MatMult(AS, v, uu); //uu=A*v
410 VecAYPX(u, -alpha, uu); //u = uu-alpha*u;
411 VecNorm(u, NORM_2, &beta); // beta=norm(u)
412 VecAYPX(u,1/beta,zero_long); // u = u / beta;
413 norma=sqrt(norma*norma+alpha*alpha+beta*beta); // norma = norm([norma alpha beta]);
416 rho = sqrt(rhot*rhot + beta*beta);
420 if (phi == 0) { // stagnation of the method
424 VecAYPX(d,-thet,v); //d = (v - thet * d);
425 VecAYPX(d,1/rho,zero_short); //d=d/ rho;
428 if (normar/(norma*normr) <= tol) { // check for convergence in min{|b-A*x|}
431 if (normr <= tolb) { // check for convergence in A*x=b
436 VecAXPY(x_lsqr,phi,d); // x_lsqr=x_lsqr+phi*d
437 normr = abs(s) * normr;
438 MatMultTranspose(AS, u, vt); //vt=A'*u;
439 VecAYPX(v,-beta,vt); // v = vt - beta * v;
440 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
441 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
442 normar = alpha * abs( s * phi);
449 MatMult(S, x_lsqr, x); //x = S*x_small;
454 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time LSQR : %g (s)\n", T2-T1); CHKERRQ(ierr);
455 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations LSQR : %D\n", total); CHKERRQ(ierr);
468 int main(int argc,char **args)
470 Vec x,b,u; /* approx solution, RHS, exact solution */
471 Mat A; /* linear system matrix */
472 KSP ksp; /* linear solver context */
473 PC pc; /* preconditioner context */
474 PetscReal norm; /* norm of solution error */
475 PetscScalar v,one = 1.0;
476 PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
479 PetscInitialize(&argc,&args,(char*)0,help);
480 ierr = PetscOptionsGetInt(NULL,"-m",&m,NULL);CHKERRQ(ierr);
481 ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr);
483 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
484 Compute the matrix and right-hand-side vector that define
485 the linear system, Ax = b.
486 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
488 Create parallel matrix, specifying only its global dimensions.
489 When using MatCreate(), the matrix format can be specified at
490 runtime. Also, the parallel partioning of the matrix is
491 determined by PETSc at runtime.
493 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
494 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);CHKERRQ(ierr);
495 ierr = MatSetFromOptions(A);CHKERRQ(ierr);
496 ierr = MatSetUp(A);CHKERRQ(ierr);
499 Currently, all PETSc parallel matrix formats are partitioned by
500 contiguous chunks of rows across the processors. Determine which
501 rows of the matrix are locally owned.
503 ierr = MatGetOwnershipRange(A,&Istart,&Iend);CHKERRQ(ierr);
506 Set matrix elements for the 2-D, five-point stencil in parallel.
507 - Each processor needs to insert only elements that it owns
508 locally (but any non-local elements will be sent to the
509 appropriate processor during matrix assembly).
510 - Always specify global rows and columns of matrix entries.
512 for (Ii=Istart; Ii<Iend; Ii++) {
513 v = -1.0; i = Ii/n; j = Ii - i*n;
514 if (i>0) {J = Ii - n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
515 if (i<m-1) {J = Ii + n; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
516 if (j>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
517 if (j<n-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);CHKERRQ(ierr);}
518 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);CHKERRQ(ierr);
522 Assemble matrix, using the 2-step process:
523 MatAssemblyBegin(), MatAssemblyEnd()
524 Computations can be done while messages are in transition
525 by placing code between these two statements.
527 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
531 Create parallel vectors.
532 - When using VecCreate() VecSetSizes() and VecSetFromOptions(),
533 we specify only the vector's global
534 dimension; the parallel partitioning is determined at runtime.
535 - Note: We form 1 vector from scratch and then duplicate as needed.
537 ierr = VecCreate(PETSC_COMM_WORLD,&u);CHKERRQ(ierr);
538 ierr = VecSetSizes(u,PETSC_DECIDE,m*n);CHKERRQ(ierr);
539 ierr = VecSetFromOptions(u);CHKERRQ(ierr);
540 ierr = VecDuplicate(u,&b);CHKERRQ(ierr);
541 ierr = VecDuplicate(b,&x);CHKERRQ(ierr);
544 Set exact solution; then compute right-hand-side vector.
546 ierr = VecSet(u,one);CHKERRQ(ierr);
547 ierr = MatMult(A,u,b);CHKERRQ(ierr);
549 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
550 Create the linear solver and set various options
551 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
554 Create linear solver context
556 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
559 Set operators. Here the matrix that defines the linear system
560 also serves as the preconditioning matrix.
562 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
565 Set linear solver defaults for this problem (optional).
566 - By extracting the KSP and PC contexts from the KSP context,
567 we can then directly call any KSP and PC routines
568 to set various options.
570 ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr);
571 ierr = KSPSetTolerances(ksp,1e-9,1e-9,PETSC_DEFAULT,5000000);CHKERRQ(ierr);
574 Set runtime options, e.g.,
575 -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
576 These options will override those specified above as long as
577 KSPSetFromOptions() is called _after_ any other customization
580 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
582 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
583 Solve the linear system
584 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
587 ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr);
589 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
590 Check solution and clean up
591 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
597 VecDuplicate(b,&sol);
600 VecNorm(sol, NORM_2, &norm);
601 ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr);
602 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);CHKERRQ(ierr);
603 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n\n\n", T2-T1); CHKERRQ(ierr);
617 VecDuplicate(b,&sol);
619 KrylovMinimize(A, b, x2);
625 VecNorm(sol, NORM_2, &norm);
626 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization %g\n",norm);
636 VecDuplicate(b,&sol);
638 KrylovMinimizeLSQR(A, b, x2);
644 VecNorm(sol, NORM_2, &norm);
645 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Error Krylov Minimization LSQR %g\n",norm);
652 Free work space. All PETSc objects should be destroyed when they
653 are no longer needed.
655 ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
656 ierr = VecDestroy(&u);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr);
657 ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr);
659 ierr = PetscFinalize();