2 // /home/couturie/work/petsc-3.5.1/arch-linux2-c-debug/bin/mpirun -np 3 ./ex45 -da_grid_x 160 -da_grid_y 160 -da_grid_z 160 -ksp_type fgmres
6 Laplacian in 3D. Modeled by the partial differential equation
8 - Laplacian u = 1,0 < x,y,z < 1,
10 with boundary conditions
12 u = 1 for x = 0, x = 1, y = 0, y = 1, z = 0, z = 1.
14 This uses multigrid to solve the linear system
16 See src/snes/examples/tutorials/ex50.c
18 Can also be run with -pc_type exotic -ksp_type fgmres
22 static char help[] = "Solves 3D Laplacian using multigrid.\n\n";
26 #include <petscdmda.h>
28 extern PetscErrorCode ComputeMatrix(KSP,Mat,Mat,void*);
29 extern PetscErrorCode ComputeRHS(KSP,Vec,void*);
30 extern PetscErrorCode ComputeInitialGuess(KSP,Vec,void*);
33 #define __FUNCT__ "main"
41 int KrylovMinimize(Mat A, Vec b, Vec x) {
46 PetscScalar gamma, alpha, oldgamma, beta;
47 PetscReal norm=20, Eprecision=1e-8, cgprec=1e-40;
48 PetscInt giter=0, ColS=6, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
60 Vec Alpha, p, ss, vect, r, q, Ax;
65 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
66 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
67 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
74 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
77 MatGetOwnershipRange(A,&aa,&bb);
79 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
80 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
83 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
84 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
85 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
86 ierr = MatSetUp(S); CHKERRQ(ierr);
88 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
90 ierr = VecCreate(PETSC_COMM_WORLD, &Alpha); CHKERRQ(ierr);
91 ierr = VecSetSizes(Alpha, PETSC_DECIDE, ColS); CHKERRQ(ierr);
92 ierr = VecSetFromOptions(Alpha); CHKERRQ(ierr);
93 ierr = VecDuplicate(Alpha, &vect); CHKERRQ(ierr);
94 ierr = VecDuplicate(Alpha, &p); CHKERRQ(ierr);
95 ierr = VecDuplicate(Alpha, &ss); CHKERRQ(ierr);
96 ierr = VecDuplicate(b, &r); CHKERRQ(ierr);
97 ierr = VecDuplicate(b, &q); CHKERRQ(ierr);
98 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
100 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
101 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
105 //indexes of row (these indexes are global)
106 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
107 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
110 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
111 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 30); CHKERRQ(ierr);
112 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
116 //GMRES WITH MINIMIZATION
118 ierr = KSPSetUp(ksp);CHKERRQ(ierr);
119 while(giter<Emaxiter && norm>Eprecision ){
120 for(col=0; col<ColS && norm>Eprecision; col++){
124 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
126 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
127 total += its; Iiter ++;
132 ierr = VecGetArray(x, &array);
133 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
134 VecRestoreArray(x, &array);
138 KSPGetResidualNorm(ksp,&norm);
140 ierr = VecCopy(x, residu); CHKERRQ(ierr);
141 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
142 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
146 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
147 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
154 if( norm>Eprecision) {
156 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
157 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
162 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
166 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
171 //Minimization with CGLS
172 MatMult(AS, Alpha, r); VecAYPX(r, -1, b); //r_0 = b-AS*x_0
175 MatMultTranspose(AS, r, p); //p_0 = AS'*r_0
180 ierr = VecCopy(p, ss); CHKERRQ(ierr); //p_0 = s_0
181 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
182 while(gamma>cgprec && iter<iterations){
183 MatMult(AS, p, q); //q = AS*p
184 VecNorm(q, NORM_2, &alpha); alpha = alpha * alpha; //alpha = norm2(q)^2
185 alpha = gamma / alpha;
186 VecAXPY(Alpha, alpha, p); //Alpha += alpha*p
187 VecAXPY(r, -alpha, q); //r -= alpha*q
188 MatMultTranspose(AS, r, ss); // ss = AS'*r
190 VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
191 beta = gamma / oldgamma;
192 VecAYPX(p, beta, ss); //p = s+beta*p;
198 MatMult(S, Alpha, x); //x = S*Alpha;
203 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
204 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
218 int KrylovMinimizeLSQR(Mat A, Vec b, Vec x) {
223 PetscScalar alpha, beta;
224 PetscReal norm=20, Eprecision=1e-8, tol=1e-40;
225 PetscInt giter=0, ColS=6, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
231 PetscInt Istart,Iend;
238 PetscScalar norm_ksp;
239 Vec u,v,d,uu,vt,zero_long,zero_short,x_lsqr;
243 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
244 ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
245 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
252 ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);
255 MatGetOwnershipRange(A,&aa,&bb);
257 // ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);
258 //PetscSynchronizedFlush(PETSC_COMM_WORLD);
261 ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
262 ierr = MatSetSizes(S, bb-aa, PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
263 ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
264 ierr = MatSetUp(S); CHKERRQ(ierr);
266 ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
270 ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
272 ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
273 ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
277 ierr = VecDuplicate(b,&u);CHKERRQ(ierr);
280 ierr = VecDuplicate(b,&uu);CHKERRQ(ierr);
281 ierr = VecDuplicate(b,&zero_long);CHKERRQ(ierr);
282 ierr = VecSet(zero_long,0);CHKERRQ(ierr);
285 ierr = VecCreate(PETSC_COMM_WORLD, &v); CHKERRQ(ierr);
286 ierr = VecSetSizes(v, PETSC_DECIDE, ColS); CHKERRQ(ierr);
287 ierr = VecSetFromOptions(v); CHKERRQ(ierr);
288 ierr = VecDuplicate(v,&zero_short);CHKERRQ(ierr);
289 ierr = VecSet(zero_short,0);CHKERRQ(ierr);
290 ierr = VecDuplicate(v,&d);CHKERRQ(ierr);
291 ierr = VecDuplicate(v,&vt);CHKERRQ(ierr);
292 ierr = VecDuplicate(v,&x_lsqr);CHKERRQ(ierr);
295 //indexes of row (these indexes are global)
296 ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
297 for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
300 // ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
301 ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 30); CHKERRQ(ierr);
302 ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
307 //GMRES WITH MINIMIZATION
309 ierr = KSPSetUp(ksp);CHKERRQ(ierr);
310 while(giter<Emaxiter && norm>Eprecision ){
311 for(col=0; col<ColS && norm>Eprecision; col++){
315 ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
317 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
318 total += its; Iiter ++;
323 ierr = VecGetArray(x, &array);
324 ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
325 VecRestoreArray(x, &array);
328 KSPGetResidualNorm(ksp,&norm);
332 ierr = VecCopy(x, residu); CHKERRQ(ierr);
333 ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
334 ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
338 ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
339 ierr = VecCopy(x, x_old); CHKERRQ(ierr);
346 if( norm>Eprecision) {
348 MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
349 MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
356 MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
361 MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
373 PetscScalar n2b,tolb,normr,c,s,phibar,normar,norma,thet,rhot,rho,phi;
376 VecNorm(b, NORM_2, &n2b); //n2b = norm(b);
377 ierr = VecCopy(b, u); CHKERRQ(ierr); //u=b
378 VecNorm(u, NORM_2, &beta); // beta=norm(u)
381 VecAYPX(u,1/beta,zero_long); // u = u / beta;
386 MatMultTranspose(AS, u, v); //v=A'*u
387 ierr = VecSet(x_lsqr,0);CHKERRQ(ierr);
388 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
390 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
392 ierr = VecSet(d,0);CHKERRQ(ierr);
393 normar = alpha * beta;
396 for(i=0;i<iterations;i++) {
397 MatMult(AS, v, uu); //uu=A*v
398 VecAYPX(u, -alpha, uu); //u = uu-alpha*u;
399 VecNorm(u, NORM_2, &beta); // beta=norm(u)
400 VecAYPX(u,1/beta,zero_long); // u = u / beta;
401 norma=sqrt(norma*norma+alpha*alpha+beta*beta); // norma = norm([norma alpha beta]);
404 rho = sqrt(rhot*rhot + beta*beta);
408 if (phi == 0) { // stagnation of the method
412 VecAYPX(d,-thet,v); //d = (v - thet * d);
413 VecAYPX(d,1/rho,zero_short); //d=d/ rho;
416 VecAXPY(x_lsqr,phi,d); // x_lsqr=x_lsqr+phi*d
417 normr = abs(s) * normr;
418 MatMultTranspose(AS, u, vt); //vt=A'*u;
419 VecAYPX(v,-beta,vt); // v = vt - beta * v;
420 VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
421 VecAYPX(v,1/alpha,zero_short); // v = v / alpha;
422 normar = alpha * abs( s * phi);
429 MatMult(S, x_lsqr, x); //x = S*x_small;
434 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time LSQR : %g (s)\n", T2-T1); CHKERRQ(ierr);
435 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations LSQR : %D\n", total); CHKERRQ(ierr);
447 int main(int argc,char **argv)
458 PetscInitialize(&argc,&argv,(char*)0,help);
460 ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
461 ierr = DMDACreate3d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DM_BOUNDARY_NONE,DMDA_STENCIL_STAR,-7,-7,-7,PETSC_DECIDE,PETSC_DECIDE,PETSC_DECIDE,1,1,0,0,0,&da);CHKERRQ(ierr);
462 ierr = KSPSetDM(ksp,da);CHKERRQ(ierr);
463 ierr = KSPSetComputeInitialGuess(ksp,ComputeInitialGuess,NULL);CHKERRQ(ierr);
464 ierr = KSPSetComputeRHS(ksp,ComputeRHS,NULL);CHKERRQ(ierr);
465 ierr = KSPSetComputeOperators(ksp,ComputeMatrix,NULL);CHKERRQ(ierr);
466 ierr = DMDestroy(&da);CHKERRQ(ierr);
468 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
473 ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
476 PCGetType(pc, &type);
478 PetscPrintf(PETSC_COMM_WORLD, "PC TYPE %s \n", type);
479 KSPGetType(ksp,&type);
480 PetscPrintf(PETSC_COMM_WORLD, "SOLVER TYPE %s \n", type);
484 ierr = KSPSetTolerances(ksp, 1e-10, 1e-10, PETSC_DEFAULT, 50000000); CHKERRQ(ierr);
486 ierr = KSPSetUp(ksp);CHKERRQ(ierr);
487 ierr = KSPSolve(ksp,NULL,NULL);CHKERRQ(ierr);
492 ierr = KSPGetSolution(ksp,&x);CHKERRQ(ierr);
493 ierr = KSPGetRhs(ksp,&b);CHKERRQ(ierr);
494 ierr = VecDuplicate(b,&r);CHKERRQ(ierr);
495 ierr = KSPGetOperators(ksp,&A,NULL);CHKERRQ(ierr);
497 ierr = MatMult(A,x,r);CHKERRQ(ierr);
498 ierr = VecAXPY(r,-1.0,b);CHKERRQ(ierr);
499 ierr = VecNorm(r,NORM_2,&norm);CHKERRQ(ierr);
500 ierr = PetscPrintf(PETSC_COMM_WORLD,"Residual norm %g\n",(double)norm);CHKERRQ(ierr);
501 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
503 ierr = KSPGetIterationNumber(ksp, &total); CHKERRQ(ierr);
504 ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
508 // ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
516 VecDuplicate(b,&sol);
518 KrylovMinimize(A, b, x);
526 VecNorm(sol, NORM_2, &norm);
527 ierr = PetscPrintf(PETSC_COMM_WORLD, "Error2 %g\n",norm);
529 ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
539 VecDuplicate(b,&sol);
541 KrylovMinimizeLSQR(A, b, x);
549 VecNorm(sol, NORM_2, &norm);
550 ierr = PetscPrintf(PETSC_COMM_WORLD, "Error LSQR %g\n",norm);
553 ierr = KSPDestroy(&ksp);CHKERRQ(ierr);
557 ierr = VecDestroy(&r);CHKERRQ(ierr);
559 ierr = PetscFinalize();
565 #define __FUNCT__ "ComputeRHS"
566 PetscErrorCode ComputeRHS(KSP ksp,Vec b,void *ctx)
569 PetscInt i,j,k,mx,my,mz,xm,ym,zm,xs,ys,zs;
571 PetscScalar Hx,Hy,Hz,HxHydHz,HyHzdHx,HxHzdHy;
572 PetscScalar ***barray;
574 PetscFunctionBeginUser;
575 ierr = KSPGetDM(ksp,&dm);CHKERRQ(ierr);
576 ierr = DMDAGetInfo(dm,0,&mx,&my,&mz,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
577 Hx = 1.0 / (PetscReal)(mx-1); Hy = 1.0 / (PetscReal)(my-1); Hz = 1.0 / (PetscReal)(mz-1);
578 HxHydHz = Hx*Hy/Hz; HxHzdHy = Hx*Hz/Hy; HyHzdHx = Hy*Hz/Hx;
579 ierr = DMDAGetCorners(dm,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr);
580 ierr = DMDAVecGetArray(dm,b,&barray);CHKERRQ(ierr);
582 for (k=zs; k<zs+zm; k++) {
583 for (j=ys; j<ys+ym; j++) {
584 for (i=xs; i<xs+xm; i++) {
585 if (i==0 || j==0 || k==0 || i==mx-1 || j==my-1 || k==mz-1) {
586 barray[k][j][i] = 2.0*(HxHydHz + HxHzdHy + HyHzdHx);
588 barray[k][j][i] = Hx*Hy*Hz;
593 ierr = DMDAVecRestoreArray(dm,b,&barray);CHKERRQ(ierr);
594 PetscFunctionReturn(0);
598 #define __FUNCT__ "ComputeInitialGuess"
599 PetscErrorCode ComputeInitialGuess(KSP ksp,Vec b,void *ctx)
603 PetscFunctionBeginUser;
604 ierr = VecSet(b,0);CHKERRQ(ierr);
605 PetscFunctionReturn(0);
609 #define __FUNCT__ "ComputeMatrix"
610 PetscErrorCode ComputeMatrix(KSP ksp,Mat jac,Mat B,void *ctx)
614 PetscInt i,j,k,mx,my,mz,xm,ym,zm,xs,ys,zs;
615 PetscScalar v[7],Hx,Hy,Hz,HxHydHz,HyHzdHx,HxHzdHy;
616 MatStencil row,col[7];
618 PetscFunctionBeginUser;
619 ierr = KSPGetDM(ksp,&da);CHKERRQ(ierr);
620 ierr = DMDAGetInfo(da,0,&mx,&my,&mz,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
621 Hx = 1.0 / (PetscReal)(mx-1); Hy = 1.0 / (PetscReal)(my-1); Hz = 1.0 / (PetscReal)(mz-1);
622 HxHydHz = Hx*Hy/Hz; HxHzdHy = Hx*Hz/Hy; HyHzdHx = Hy*Hz/Hx;
623 ierr = DMDAGetCorners(da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr);
625 for (k=zs; k<zs+zm; k++) {
626 for (j=ys; j<ys+ym; j++) {
627 for (i=xs; i<xs+xm; i++) {
628 row.i = i; row.j = j; row.k = k;
629 if (i==0 || j==0 || k==0 || i==mx-1 || j==my-1 || k==mz-1) {
630 v[0] = 2.0*(HxHydHz + HxHzdHy + HyHzdHx);
631 ierr = MatSetValuesStencil(B,1,&row,1,&row,v,INSERT_VALUES);CHKERRQ(ierr);
633 v[0] = -HxHydHz;col[0].i = i; col[0].j = j; col[0].k = k-1;
634 v[1] = -HxHzdHy;col[1].i = i; col[1].j = j-1; col[1].k = k;
635 v[2] = -HyHzdHx;col[2].i = i-1; col[2].j = j; col[2].k = k;
636 v[3] = 2.0*(HxHydHz + HxHzdHy + HyHzdHx);col[3].i = row.i; col[3].j = row.j; col[3].k = row.k;
637 v[4] = -HyHzdHx;col[4].i = i+1; col[4].j = j; col[4].k = k;
638 v[5] = -HxHzdHy;col[5].i = i; col[5].j = j+1; col[5].k = k;
639 v[6] = -HxHydHz;col[6].i = i; col[6].j = j; col[6].k = k+1;
640 ierr = MatSetValuesStencil(B,1,&row,7,col,v,INSERT_VALUES);CHKERRQ(ierr);
645 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
646 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
647 PetscFunctionReturn(0);