]> AND Private Git Repository - GMRES2stage.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
ex49
authorraphael couturier <couturie@extinction>
Thu, 14 Aug 2014 06:43:23 +0000 (08:43 +0200)
committerraphael couturier <couturie@extinction>
Thu, 14 Aug 2014 06:43:23 +0000 (08:43 +0200)
code/ex49.c [new file with mode: 0644]

diff --git a/code/ex49.c b/code/ex49.c
new file mode 100644 (file)
index 0000000..b875dd3
--- /dev/null
@@ -0,0 +1,1812 @@
+//  /home/couturie/work/petsc-3.5.1/arch-linux2-c-debug/bin/mpirun -np 4    ./ex49 -mx 900 -my 900 -ksp_type fgmres
+
+
+static char help[] =  "   Solves the compressible plane strain elasticity equations in 2d on the unit domain using Q1 finite elements. \n\
+   Material properties E (Youngs moduls) and nu (Poisson ratio) may vary as a function of space. \n\
+   The model utilisse boundary conditions which produce compression in the x direction. \n\
+Options: \n\
+     -mx : number elements in x-direciton \n\
+     -my : number elements in y-direciton \n\
+     -c_str : indicates the structure of the coefficients to use. \n\
+          -c_str 0 => Setup for an isotropic material with constant coefficients. \n\
+                         Parameters: \n\
+                             -iso_E  : Youngs modulus \n\
+                             -iso_nu : Poisson ratio \n\
+          -c_str 1 => Setup for a step function in the material properties in x. \n\
+                         Parameters: \n\
+                              -step_E0  : Youngs modulus to the left of the step \n\
+                              -step_nu0 : Poisson ratio to the left of the step \n\
+                              -step_E1  : Youngs modulus to the right of the step \n\
+                              -step_n1  : Poisson ratio to the right of the step \n\
+                              -step_xc  : x coordinate of the step \n\
+          -c_str 2 => Setup for a checkerboard material with alternating properties. \n\
+                      Repeats the following pattern throughout the domain. For example with 4 materials specified, we would heve \n\
+                      -------------------------\n\
+                      |  D  |  A  |  B  |  C  |\n\
+                      ------|-----|-----|------\n\
+                      |  C  |  D  |  A  |  B  |\n\
+                      ------|-----|-----|------\n\
+                      |  B  |  C  |  D  |  A  |\n\
+                      ------|-----|-----|------\n\
+                      |  A  |  B  |  C  |  D  |\n\
+                      -------------------------\n\
+                      \n\
+                         Parameters: \n\
+                              -brick_E    : a comma seperated list of Young's modulii \n\
+                              -brick_nu   : a comma seperated list of Poisson ratio's  \n\
+                              -brick_span : the number of elements in x and y each brick will span \n\
+          -c_str 3 => Setup for a sponge-like material with alternating properties. \n\
+                      Repeats the following pattern throughout the domain \n\
+                      -----------------------------\n\
+                      |       [background]        |\n\
+                      |          E0,nu0           |\n\
+                      |     -----------------     |\n\
+                      |     |  [inclusion]  |     |\n\
+                      |     |    E1,nu1     |     |\n\
+                      |     |               |     |\n\
+                      |     | <---- w ----> |     |\n\
+                      |     |               |     |\n\
+                      |     |               |     |\n\
+                      |     -----------------     |\n\
+                      |                           |\n\
+                      |                           |\n\
+                      -----------------------------\n\
+                      <--------  t + w + t ------->\n\
+                      \n\
+                         Parameters: \n\
+                              -sponge_E0  : Youngs moduls of the surrounding material \n\
+                              -sponge_E1  : Youngs moduls of the inclusio \n\
+                              -sponge_nu0 : Poisson ratio of the surrounding material \n\
+                              -sponge_nu1 : Poisson ratio of the inclusio \n\
+                              -sponge_t   : the number of elements defining the border around each inclusion \n\
+                              -sponge_w   : the number of elements in x and y each inclusion will span\n\
+     -use_gp_coords : Evaluate the Youngs modulus, Poisson ratio and the body force at the global coordinates of the quadrature points.\n\
+     By default, E, nu and the body force are evaulated at the element center and applied as a constant over the entire element.\n\
+     -use_nonsymbc : Option to use non-symmetric boundary condition imposition. This choice will use less memory.";
+
+/* Contributed by Dave May */
+
+#include <petscksp.h>
+#include <petscdm.h>
+#include <petscdmda.h>
+
+static PetscErrorCode DMDABCApplyCompression(DM,Mat,Vec);
+static PetscErrorCode DMDABCApplySymmetricCompression(DM elas_da,Mat A,Vec f,IS *dofs,Mat *AA,Vec *ff);
+
+
+#define NSD            2 /* number of spatial dimensions */
+#define NODES_PER_EL   4 /* nodes per element */
+#define U_DOFS         2 /* degrees of freedom per displacement node */
+#define GAUSS_POINTS   4
+
+
+
+
+int KrylovMinimize(Mat A, Vec b, Vec x) {
+  
+
+  //Variables
+
+  PetscScalar  gamma, alpha, oldgamma, beta;
+  PetscReal norm=20, Eprecision=1e-8, cgprec=1e-40;     
+  PetscInt giter=0, ColS=8, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
+  PetscErrorCode ierr;
+  PetscScalar T1, T2;
+  KSP ksp;
+  PetscInt total=0;  
+  PetscInt size;
+  PetscInt Istart,Iend;
+  PetscInt i,its;
+  Vec       x_old, residu;
+  Mat S, AS;
+  PetscScalar *array;
+  PetscInt *ind_row;
+  Vec Alpha, p, ss, vect, r, q, Ax;
+
+
+  PetscInt first=1;
+
+  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
+  ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
+  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
+
+
+
+
+  VecGetSize(b,&size);
+
+  ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);  
+
+  PetscInt aa,bb;
+  MatGetOwnershipRange(A,&aa,&bb);
+
+  //  ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);  
+  //PetscSynchronizedFlush(PETSC_COMM_WORLD);
+
+
+  ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
+  ierr = MatSetSizes(S, bb-aa,  PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
+  ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
+  ierr = MatSetUp(S); CHKERRQ(ierr);
+
+  ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
+
+  ierr = VecCreate(PETSC_COMM_WORLD, &Alpha); CHKERRQ(ierr);
+  ierr = VecSetSizes(Alpha, PETSC_DECIDE, ColS); CHKERRQ(ierr);
+  ierr = VecSetFromOptions(Alpha); CHKERRQ(ierr);
+  ierr = VecDuplicate(Alpha, &vect); CHKERRQ(ierr);
+  ierr = VecDuplicate(Alpha, &p); CHKERRQ(ierr);
+  ierr = VecDuplicate(Alpha, &ss); CHKERRQ(ierr);
+  ierr = VecDuplicate(b, &r); CHKERRQ(ierr);
+  ierr = VecDuplicate(b, &q); CHKERRQ(ierr);
+  ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
+
+  ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
+  ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
+
+
+
+  //indexes of row (these indexes are global)
+  ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
+  for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
+
+  //Initializations
+  //  ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
+  ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 16); CHKERRQ(ierr);
+  ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
+
+
+
+  //GMRES WITH MINIMIZATION
+  T1 = MPI_Wtime();
+  while(giter<Emaxiter && norm>Eprecision ){
+    for(col=0; col<ColS  &&  norm>Eprecision; col++){
+
+
+      //Solve
+      ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
+
+      ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
+      total += its; Iiter ++;
+
+
+
+      //Build S'
+      ierr = VecGetArray(x, &array);
+      ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
+      VecRestoreArray(x, &array);
+
+
+
+      KSPGetResidualNorm(ksp,&norm);
+
+      /* //Error
+      ierr = VecCopy(x, residu); CHKERRQ(ierr);
+      ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
+      ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
+       */
+
+
+      ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
+      ierr = VecCopy(x, x_old); CHKERRQ(ierr);
+
+
+    }
+
+
+    //minimization step
+    if( norm>Eprecision) {
+
+      MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
+      MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
+
+
+      //Build AS
+      if(first) {
+        MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
+        first=0;
+      }
+      else
+        MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
+
+
+
+
+      //Minimization with CGLS 
+      MatMult(AS, Alpha, r);  VecAYPX(r, -1, b); //r_0 = b-AS*x_0
+
+
+      MatMultTranspose(AS, r, p); //p_0 = AS'*r_0 
+
+
+
+
+      ierr = VecCopy(p, ss); CHKERRQ(ierr); //p_0 = s_0
+      VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
+      while(gamma>cgprec && iter<iterations){
+        MatMult(AS, p, q); //q = AS*p
+        VecNorm(q, NORM_2, &alpha); alpha = alpha * alpha; //alpha = norm2(q)^2
+        alpha = gamma / alpha; 
+        VecAXPY(Alpha, alpha, p); //Alpha += alpha*p
+        VecAXPY(r, -alpha, q); //r -= alpha*q
+        MatMultTranspose(AS, r, ss); // ss = AS'*r
+        oldgamma = gamma;
+        VecNorm(ss, NORM_2, &gamma); gamma = gamma * gamma; //gamma = norm2(s)^2
+        beta = gamma / oldgamma;
+        VecAYPX(p, beta, ss); //p = s+beta*p;
+        iter ++;
+      }        
+
+      iter = 0; giter ++;
+      //Minimizer
+      MatMult(S, Alpha, x); //x = S*Alpha;
+    }
+
+  }
+  T2 = MPI_Wtime();
+  ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
+  ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
+
+  return 0;
+
+}
+
+
+
+
+
+
+
+
+
+int KrylovMinimizeLSQR(Mat A, Vec b, Vec x) {
+  
+
+  //Variables
+
+  PetscScalar  alpha, beta;
+  PetscReal norm=20, Eprecision=1e-8, tol=1e-40;     
+  PetscInt giter=0, ColS=8, col=0, Emaxiter=50000000, iter=0, iterations=15, Iiter=0;
+  PetscErrorCode ierr;
+  PetscScalar T1, T2;
+  KSP ksp;
+  PetscInt total=0;  
+  PetscInt size;
+  PetscInt Istart,Iend;
+  PetscInt i,its;
+  Vec       x_old, residu;
+  Mat S, AS;
+  PetscScalar *array;
+  PetscInt *ind_row;
+  Vec  Ax;
+  PetscScalar norm_ksp;
+  Vec u,v,d,uu,vt,zero_long,zero_short,x_lsqr;
+
+  PetscInt first=1;
+
+  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr);
+  ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr);
+  ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr);
+
+
+
+
+  VecGetSize(b,&size);
+
+  ierr = PetscPrintf(PETSC_COMM_WORLD, "Size of vector %D\n", size); CHKERRQ(ierr);  
+
+  PetscInt aa,bb;
+  MatGetOwnershipRange(A,&aa,&bb);
+
+  //  ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%D %D\n", aa,bb); CHKERRQ(ierr);  
+  //PetscSynchronizedFlush(PETSC_COMM_WORLD);
+
+
+  ierr = MatCreate(PETSC_COMM_WORLD, &S);CHKERRQ(ierr);
+  ierr = MatSetSizes(S, bb-aa,  PETSC_DECIDE, size, ColS); CHKERRQ(ierr);
+  ierr = MatSetType(S, MATMPIDENSE); CHKERRQ(ierr);
+  ierr = MatSetUp(S); CHKERRQ(ierr);
+
+  ierr = MatGetOwnershipRange(S, &Istart, &Iend); CHKERRQ(ierr);
+
+
+
+  ierr = VecDuplicate(b, &Ax); CHKERRQ(ierr);
+
+  ierr = VecDuplicate(b,&x_old);CHKERRQ(ierr);
+  ierr = VecDuplicate(b,&residu);CHKERRQ(ierr);
+
+
+  //long vector
+  ierr = VecDuplicate(b,&u);CHKERRQ(ierr);
+
+
+  ierr = VecDuplicate(b,&uu);CHKERRQ(ierr);
+  ierr = VecDuplicate(b,&zero_long);CHKERRQ(ierr);
+  ierr = VecSet(zero_long,0);CHKERRQ(ierr);
+
+  //small vector
+  ierr = VecCreate(PETSC_COMM_WORLD, &v); CHKERRQ(ierr);
+  ierr = VecSetSizes(v, PETSC_DECIDE, ColS); CHKERRQ(ierr);
+  ierr = VecSetFromOptions(v); CHKERRQ(ierr);
+  ierr = VecDuplicate(v,&zero_short);CHKERRQ(ierr);
+  ierr = VecSet(zero_short,0);CHKERRQ(ierr);
+  ierr = VecDuplicate(v,&d);CHKERRQ(ierr);
+  ierr = VecDuplicate(v,&vt);CHKERRQ(ierr);
+  ierr = VecDuplicate(v,&x_lsqr);CHKERRQ(ierr);
+
+
+  //indexes of row (these indexes are global)
+  ind_row = (PetscInt*)malloc(sizeof(PetscInt)*(Iend-Istart));
+  for(i=0; i<Iend-Istart; i++) ind_row[i] = i + Istart;
+
+  //Initializations
+  //  ierr = KSPGMRESSetRestart(ksp, 16); CHKERRQ(ierr);
+  ierr = KSPSetTolerances(ksp, 1e-13, 1e-13, PETSC_DEFAULT, 16); CHKERRQ(ierr);
+  ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr);
+
+
+
+
+  //GMRES WITH MINIMIZATION
+  T1 = MPI_Wtime();
+  while(giter<Emaxiter && norm>Eprecision ){
+    for(col=0; col<ColS  &&  norm>Eprecision; col++){
+
+
+      //Solve
+      ierr = KSPSolve(ksp, b, x); CHKERRQ(ierr);
+
+      ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr);
+      total += its; Iiter ++;
+
+
+
+      //Build S'
+      ierr = VecGetArray(x, &array);
+      ierr = MatSetValues(S, Iend-Istart, ind_row, 1, &col, array, INSERT_VALUES);
+      VecRestoreArray(x, &array);
+
+
+      
+      KSPGetResidualNorm(ksp,&norm);
+      /*
+      //Error
+      ierr = VecCopy(x, residu); CHKERRQ(ierr);
+      ierr = VecAXPY(residu, -1, x_old); CHKERRQ(ierr);
+      ierr = VecNorm(residu, NORM_INFINITY, &norm); CHKERRQ(ierr);
+       */
+
+
+      ierr = PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, outer iteration %D\n", norm, giter); CHKERRQ(ierr);
+      ierr = VecCopy(x, x_old); CHKERRQ(ierr);
+
+
+    }
+
+
+    //minimization step
+    if( norm>Eprecision) {
+
+      MatAssemblyBegin(S, MAT_FINAL_ASSEMBLY);
+      MatAssemblyEnd(S, MAT_FINAL_ASSEMBLY);
+
+
+
+
+      //Build AS
+      if(first) {
+        MatMatMult(A,S, MAT_INITIAL_MATRIX,PETSC_DEFAULT,&AS);
+        
+        first=0;
+      }
+      else
+        MatMatMult(A,S, MAT_REUSE_MATRIX,PETSC_DEFAULT,&AS);
+
+
+
+
+
+      //LSQR
+      //LSQR
+      //LSQR
+
+
+
+      PetscScalar n2b,tolb,normr,c,s,phibar,normar,norma,thet,rhot,rho,phi;
+      PetscInt stag;
+      tolb = tol * n2b;
+      VecNorm(b, NORM_2, &n2b); //n2b = norm(b);
+      ierr = VecCopy(b, u); CHKERRQ(ierr);   //u=b
+      VecNorm(u, NORM_2, &beta); // beta=norm(u)
+      normr=beta;
+      if (beta != 0) {
+        VecAYPX(u,1/beta,zero_long); //  u = u / beta;
+      }
+      c=1;
+      s=0;
+      phibar=beta;
+      MatMultTranspose(AS, u, v); //v=A'*u
+      ierr = VecSet(x_lsqr,0);CHKERRQ(ierr);
+      VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
+      if (alpha != 0) {
+        VecAYPX(v,1/alpha,zero_short); //  v = v / alpha;
+      }
+      ierr = VecSet(d,0);CHKERRQ(ierr);
+      normar = alpha * beta;
+      norma=0;
+      //stag=0;
+      for(i=0;i<iterations;i++) {
+        MatMult(AS, v, uu); //uu=A*v
+        VecAYPX(u, -alpha, uu); //u = uu-alpha*u;
+        VecNorm(u, NORM_2, &beta); // beta=norm(u)
+        VecAYPX(u,1/beta,zero_long); //  u = u / beta;
+        norma=sqrt(norma*norma+alpha*alpha+beta*beta); // norma = norm([norma alpha beta]);
+        thet = - s * alpha;
+        rhot = c * alpha;
+        rho = sqrt(rhot*rhot + beta*beta);
+        c = rhot / rho;
+        s = - beta / rho;
+        phi = c * phibar;
+        if (phi == 0) {             // stagnation of the method
+          stag = 1;
+        }
+        phibar = s * phibar;
+        VecAYPX(d,-thet,v);       //d = (v - thet * d);
+        VecAYPX(d,1/rho,zero_short);     //d=d/ rho;
+
+
+
+        VecAXPY(x_lsqr,phi,d);     // x_lsqr=x_lsqr+phi*d
+        normr = abs(s) * normr;
+        MatMultTranspose(AS, u, vt); //vt=A'*u;
+        VecAYPX(v,-beta,vt);  // v = vt - beta * v;
+        VecNorm(v, NORM_2, &alpha); // alpha=norm(v)
+        VecAYPX(v,1/alpha,zero_short); //  v = v / alpha;
+        normar = alpha * abs( s * phi);
+      }        
+      
+
+
+      iter = 0; giter ++;
+      //Minimizer
+      MatMult(S, x_lsqr, x); //x = S*x_small;
+    }
+
+  }
+  T2 = MPI_Wtime();
+  ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time LSQR : %g (s)\n", T2-T1); CHKERRQ(ierr);
+  ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations LSQR : %D\n", total); CHKERRQ(ierr);
+
+  return 0;
+
+}
+
+
+
+
+
+
+/* cell based evaluation */
+typedef struct {
+  PetscScalar E,nu,fx,fy;
+} Coefficients;
+
+/* Gauss point based evaluation 8+4+4+4 = 20 */
+typedef struct {
+  PetscScalar gp_coords[2*GAUSS_POINTS];
+  PetscScalar E[GAUSS_POINTS];
+  PetscScalar nu[GAUSS_POINTS];
+  PetscScalar fx[GAUSS_POINTS];
+  PetscScalar fy[GAUSS_POINTS];
+} GaussPointCoefficients;
+
+typedef struct {
+  PetscScalar ux_dof;
+  PetscScalar uy_dof;
+} ElasticityDOF;
+
+
+/*
+
+ D = E/((1+nu)(1-2nu)) * [ 1-nu   nu        0     ]
+                         [  nu   1-nu       0     ]
+                         [  0     0   0.5*(1-2nu) ]
+
+ B = [ d_dx   0   ]
+     [  0    d_dy ]
+     [ d_dy  d_dx ]
+
+ */
+
+/* FEM routines */
+/*
+ Element: Local basis function ordering
+ 1-----2
+ |     |
+ |     |
+ 0-----3
+ */
+static void ConstructQ12D_Ni(PetscScalar _xi[],PetscScalar Ni[])
+{
+  PetscScalar xi  = _xi[0];
+  PetscScalar eta = _xi[1];
+
+  Ni[0] = 0.25*(1.0-xi)*(1.0-eta);
+  Ni[1] = 0.25*(1.0-xi)*(1.0+eta);
+  Ni[2] = 0.25*(1.0+xi)*(1.0+eta);
+  Ni[3] = 0.25*(1.0+xi)*(1.0-eta);
+}
+
+static void ConstructQ12D_GNi(PetscScalar _xi[],PetscScalar GNi[][NODES_PER_EL])
+{
+  PetscScalar xi  = _xi[0];
+  PetscScalar eta = _xi[1];
+
+  GNi[0][0] = -0.25*(1.0-eta);
+  GNi[0][1] = -0.25*(1.0+eta);
+  GNi[0][2] =   0.25*(1.0+eta);
+  GNi[0][3] =   0.25*(1.0-eta);
+
+  GNi[1][0] = -0.25*(1.0-xi);
+  GNi[1][1] =   0.25*(1.0-xi);
+  GNi[1][2] =   0.25*(1.0+xi);
+  GNi[1][3] = -0.25*(1.0+xi);
+}
+
+static void ConstructQ12D_GNx(PetscScalar GNi[][NODES_PER_EL],PetscScalar GNx[][NODES_PER_EL],PetscScalar coords[],PetscScalar *det_J)
+{
+  PetscScalar J00,J01,J10,J11,J;
+  PetscScalar iJ00,iJ01,iJ10,iJ11;
+  PetscInt    i;
+
+  J00 = J01 = J10 = J11 = 0.0;
+  for (i = 0; i < NODES_PER_EL; i++) {
+    PetscScalar cx = coords[2*i+0];
+    PetscScalar cy = coords[2*i+1];
+
+    J00 = J00+GNi[0][i]*cx;      /* J_xx = dx/dxi */
+    J01 = J01+GNi[0][i]*cy;      /* J_xy = dy/dxi */
+    J10 = J10+GNi[1][i]*cx;      /* J_yx = dx/deta */
+    J11 = J11+GNi[1][i]*cy;      /* J_yy = dy/deta */
+  }
+  J = (J00*J11)-(J01*J10);
+
+  iJ00 =  J11/J;
+  iJ01 = -J01/J;
+  iJ10 = -J10/J;
+  iJ11 =  J00/J;
+
+
+  for (i = 0; i < NODES_PER_EL; i++) {
+    GNx[0][i] = GNi[0][i]*iJ00+GNi[1][i]*iJ01;
+    GNx[1][i] = GNi[0][i]*iJ10+GNi[1][i]*iJ11;
+  }
+
+  if (det_J != NULL) *det_J = J;
+}
+
+static void ConstructGaussQuadrature(PetscInt *ngp,PetscScalar gp_xi[][2],PetscScalar gp_weight[])
+{
+  *ngp         = 4;
+  gp_xi[0][0]  = -0.57735026919;gp_xi[0][1] = -0.57735026919;
+  gp_xi[1][0]  = -0.57735026919;gp_xi[1][1] =  0.57735026919;
+  gp_xi[2][0]  =  0.57735026919;gp_xi[2][1] =  0.57735026919;
+  gp_xi[3][0]  =  0.57735026919;gp_xi[3][1] = -0.57735026919;
+  gp_weight[0] = 1.0;
+  gp_weight[1] = 1.0;
+  gp_weight[2] = 1.0;
+  gp_weight[3] = 1.0;
+}
+
+
+/* procs to the left claim the ghost node as their element */
+#undef __FUNCT__
+#define __FUNCT__ "DMDAGetLocalElementSize"
+static PetscErrorCode DMDAGetLocalElementSize(DM da,PetscInt *mxl,PetscInt *myl,PetscInt *mzl)
+{
+  PetscErrorCode ierr;
+  PetscInt       m,n,p,M,N,P;
+  PetscInt       sx,sy,sz;
+
+  PetscFunctionBeginUser;
+  ierr = DMDAGetInfo(da,0,&M,&N,&P,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
+  ierr = DMDAGetCorners(da,&sx,&sy,&sz,&m,&n,&p);CHKERRQ(ierr);
+
+  if (mxl != NULL) {
+    *mxl = m;
+    if ((sx+m) == M) *mxl = m-1;    /* last proc */
+  }
+  if (myl != NULL) {
+    *myl = n;
+    if ((sy+n) == N) *myl = n-1;  /* last proc */
+  }
+  if (mzl != NULL) {
+    *mzl = p;
+    if ((sz+p) == P) *mzl = p-1;  /* last proc */
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDAGetElementCorners"
+static PetscErrorCode DMDAGetElementCorners(DM da,PetscInt *sx,PetscInt *sy,PetscInt *sz,PetscInt *mx,PetscInt *my,PetscInt *mz)
+{
+  PetscErrorCode ierr;
+  PetscInt       si,sj,sk;
+
+  PetscFunctionBeginUser;
+  ierr = DMDAGetGhostCorners(da,&si,&sj,&sk,0,0,0);CHKERRQ(ierr);
+
+  if (sx) {
+    *sx = si;
+    if (si != 0) *sx = si+1;
+  }
+  if (sy) {
+    *sy = sj;
+    if (sj != 0) *sy = sj+1;
+  }
+
+  if (sk) {
+    *sz = sk;
+    if (sk != 0) *sz = sk+1;
+  }
+
+  ierr = DMDAGetLocalElementSize(da,mx,my,mz);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDAGetElementOwnershipRanges2d"
+static PetscErrorCode DMDAGetElementOwnershipRanges2d(DM da,PetscInt **_lx,PetscInt **_ly)
+{
+  PetscErrorCode ierr;
+  PetscMPIInt    rank;
+  PetscInt       proc_I,proc_J;
+  PetscInt       cpu_x,cpu_y;
+  PetscInt       local_mx,local_my;
+  Vec            vlx,vly;
+  PetscInt       *LX,*LY,i;
+  PetscScalar    *_a;
+  Vec            V_SEQ;
+  VecScatter     ctx;
+
+  PetscFunctionBeginUser;
+  MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
+
+  DMDAGetInfo(da,0,0,0,0,&cpu_x,&cpu_y,0,0,0,0,0,0,0);
+
+  proc_J = rank/cpu_x;
+  proc_I = rank-cpu_x*proc_J;
+
+  ierr = PetscMalloc(sizeof(PetscInt)*cpu_x,&LX);CHKERRQ(ierr);
+  ierr = PetscMalloc(sizeof(PetscInt)*cpu_y,&LY);CHKERRQ(ierr);
+
+  ierr = DMDAGetLocalElementSize(da,&local_mx,&local_my,NULL);CHKERRQ(ierr);
+  ierr = VecCreate(PETSC_COMM_WORLD,&vlx);CHKERRQ(ierr);
+  ierr = VecSetSizes(vlx,PETSC_DECIDE,cpu_x);CHKERRQ(ierr);
+  ierr = VecSetFromOptions(vlx);CHKERRQ(ierr);
+
+  ierr = VecCreate(PETSC_COMM_WORLD,&vly);CHKERRQ(ierr);
+  ierr = VecSetSizes(vly,PETSC_DECIDE,cpu_y);CHKERRQ(ierr);
+  ierr = VecSetFromOptions(vly);CHKERRQ(ierr);
+
+  ierr = VecSetValue(vlx,proc_I,(PetscScalar)(local_mx+1.0e-9),INSERT_VALUES);CHKERRQ(ierr);
+  ierr = VecSetValue(vly,proc_J,(PetscScalar)(local_my+1.0e-9),INSERT_VALUES);CHKERRQ(ierr);
+  ierr = VecAssemblyBegin(vlx);VecAssemblyEnd(vlx);CHKERRQ(ierr);
+  ierr = VecAssemblyBegin(vly);VecAssemblyEnd(vly);CHKERRQ(ierr);
+
+
+
+  ierr = VecScatterCreateToAll(vlx,&ctx,&V_SEQ);CHKERRQ(ierr);
+  ierr = VecScatterBegin(ctx,vlx,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  ierr = VecScatterEnd(ctx,vlx,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  ierr = VecGetArray(V_SEQ,&_a);CHKERRQ(ierr);
+  for (i = 0; i < cpu_x; i++) LX[i] = (PetscInt)PetscRealPart(_a[i]);
+  ierr = VecRestoreArray(V_SEQ,&_a);CHKERRQ(ierr);
+  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);
+  ierr = VecDestroy(&V_SEQ);CHKERRQ(ierr);
+
+  ierr = VecScatterCreateToAll(vly,&ctx,&V_SEQ);CHKERRQ(ierr);
+  ierr = VecScatterBegin(ctx,vly,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  ierr = VecScatterEnd(ctx,vly,V_SEQ,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  ierr = VecGetArray(V_SEQ,&_a);CHKERRQ(ierr);
+  for (i = 0; i < cpu_y; i++) LY[i] = (PetscInt)PetscRealPart(_a[i]);
+  ierr = VecRestoreArray(V_SEQ,&_a);CHKERRQ(ierr);
+  ierr = VecScatterDestroy(&ctx);CHKERRQ(ierr);
+  ierr = VecDestroy(&V_SEQ);CHKERRQ(ierr);
+
+  *_lx = LX;
+  *_ly = LY;
+
+  ierr = VecDestroy(&vlx);CHKERRQ(ierr);
+  ierr = VecDestroy(&vly);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDACoordViewGnuplot2d"
+static PetscErrorCode DMDACoordViewGnuplot2d(DM da,const char prefix[])
+{
+  DM             cda;
+  Vec            coords;
+  DMDACoor2d     **_coords;
+  PetscInt       si,sj,nx,ny,i,j;
+  FILE           *fp;
+  char           fname[PETSC_MAX_PATH_LEN];
+  PetscMPIInt    rank;
+  PetscErrorCode ierr;
+
+  PetscFunctionBeginUser;
+  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
+  ierr = PetscSNPrintf(fname,sizeof(fname),"%s-p%1.4d.dat",prefix,rank);CHKERRQ(ierr);
+  ierr = PetscFOpen(PETSC_COMM_SELF,fname,"w",&fp);CHKERRQ(ierr);
+  if (!fp) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Cannot open file");
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### Element geometry for processor %1.4d ### \n",rank);CHKERRQ(ierr);
+
+  ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
+  ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
+  for (j = sj; j < sj+ny-1; j++) {
+    for (i = si; i < si+nx-1; i++) {
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e \n",PetscRealPart(_coords[j][i].x),PetscRealPart(_coords[j][i].y));CHKERRQ(ierr);
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e \n",PetscRealPart(_coords[j+1][i].x),PetscRealPart(_coords[j+1][i].y));CHKERRQ(ierr);
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e \n",PetscRealPart(_coords[j+1][i+1].x),PetscRealPart(_coords[j+1][i+1].y));CHKERRQ(ierr);
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e \n",PetscRealPart(_coords[j][i+1].x),PetscRealPart(_coords[j][i+1].y));CHKERRQ(ierr);
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e \n\n",PetscRealPart(_coords[j][i].x),PetscRealPart(_coords[j][i].y));CHKERRQ(ierr);
+    }
+  }
+  ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
+
+  ierr = PetscFClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDAViewGnuplot2d"
+static PetscErrorCode DMDAViewGnuplot2d(DM da,Vec fields,const char comment[],const char prefix[])
+{
+  DM             cda;
+  Vec            coords,local_fields;
+  DMDACoor2d     **_coords;
+  FILE           *fp;
+  char           fname[PETSC_MAX_PATH_LEN];
+  const char     *field_name;
+  PetscMPIInt    rank;
+  PetscInt       si,sj,nx,ny,i,j;
+  PetscInt       n_dofs,d;
+  PetscScalar    *_fields;
+  PetscErrorCode ierr;
+
+  PetscFunctionBeginUser;
+  MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
+  ierr = PetscSNPrintf(fname,sizeof(fname),"%s-p%1.4d.dat",prefix,rank);CHKERRQ(ierr);
+  ierr = PetscFOpen(PETSC_COMM_SELF,fname,"w",&fp);CHKERRQ(ierr);
+  if (!fp) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Cannot open file");
+
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### %s (processor %1.4d) ### \n",comment,rank);CHKERRQ(ierr);
+  ierr = DMDAGetInfo(da,0,0,0,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### x y ");CHKERRQ(ierr);
+  for (d = 0; d < n_dofs; d++) {
+    ierr = DMDAGetFieldName(da,d,&field_name);CHKERRQ(ierr);
+    ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%s ",field_name);CHKERRQ(ierr);
+  }
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"###\n");CHKERRQ(ierr);
+
+
+  ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
+  ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
+
+  ierr = DMCreateLocalVector(da,&local_fields);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalBegin(da,fields,INSERT_VALUES,local_fields);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalEnd(da,fields,INSERT_VALUES,local_fields);CHKERRQ(ierr);
+  ierr = VecGetArray(local_fields,&_fields);CHKERRQ(ierr);
+
+  for (j = sj; j < sj+ny; j++) {
+    for (i = si; i < si+nx; i++) {
+      PetscScalar coord_x,coord_y;
+      PetscScalar field_d;
+
+      coord_x = _coords[j][i].x;
+      coord_y = _coords[j][i].y;
+
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e ",PetscRealPart(coord_x),PetscRealPart(coord_y));CHKERRQ(ierr);
+      for (d = 0; d < n_dofs; d++) {
+        field_d = _fields[n_dofs*((i-si)+(j-sj)*(nx))+d];
+        ierr    = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e ",PetscRealPart(field_d));CHKERRQ(ierr);
+      }
+      ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"\n");CHKERRQ(ierr);
+    }
+  }
+  ierr = VecRestoreArray(local_fields,&_fields);CHKERRQ(ierr);
+  ierr = VecDestroy(&local_fields);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
+
+  ierr = PetscFClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDAViewCoefficientsGnuplot2d"
+static PetscErrorCode DMDAViewCoefficientsGnuplot2d(DM da,Vec fields,const char comment[],const char prefix[])
+{
+  DM                     cda;
+  Vec                    local_fields;
+  FILE                   *fp;
+  char                   fname[PETSC_MAX_PATH_LEN];
+  const char             *field_name;
+  PetscMPIInt            rank;
+  PetscInt               si,sj,nx,ny,i,j,p;
+  PetscInt               n_dofs,d;
+  GaussPointCoefficients **_coefficients;
+  PetscErrorCode         ierr;
+
+  PetscFunctionBeginUser;
+  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
+  ierr = PetscSNPrintf(fname,sizeof(fname),"%s-p%1.4d.dat",prefix,rank);CHKERRQ(ierr);
+  ierr = PetscFOpen(PETSC_COMM_SELF,fname,"w",&fp);CHKERRQ(ierr);
+  if (!fp) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Cannot open file");
+
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### %s (processor %1.4d) ### \n",comment,rank);CHKERRQ(ierr);
+  ierr = DMDAGetInfo(da,0,0,0,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"### x y ");CHKERRQ(ierr);
+  for (d = 0; d < n_dofs; d++) {
+    ierr = DMDAGetFieldName(da,d,&field_name);CHKERRQ(ierr);
+    ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%s ",field_name);CHKERRQ(ierr);
+  }
+  ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"###\n");CHKERRQ(ierr);
+
+
+  ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
+  ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
+
+  ierr = DMCreateLocalVector(da,&local_fields);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalBegin(da,fields,INSERT_VALUES,local_fields);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalEnd(da,fields,INSERT_VALUES,local_fields);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(da,local_fields,&_coefficients);CHKERRQ(ierr);
+
+  for (j = sj; j < sj+ny; j++) {
+    for (i = si; i < si+nx; i++) {
+      PetscScalar coord_x,coord_y;
+
+      for (p = 0; p < GAUSS_POINTS; p++) {
+        coord_x = _coefficients[j][i].gp_coords[2*p];
+        coord_y = _coefficients[j][i].gp_coords[2*p+1];
+
+        ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e ",PetscRealPart(coord_x),PetscRealPart(coord_y));CHKERRQ(ierr);
+
+        ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"%1.6e %1.6e %1.6e %1.6e",
+                            PetscRealPart(_coefficients[j][i].E[p]),PetscRealPart(_coefficients[j][i].nu[p]),
+                            PetscRealPart(_coefficients[j][i].fx[p]),PetscRealPart(_coefficients[j][i].fy[p]));CHKERRQ(ierr);
+        ierr = PetscFPrintf(PETSC_COMM_SELF,fp,"\n");CHKERRQ(ierr);
+      }
+    }
+  }
+  ierr = DMDAVecRestoreArray(da,local_fields,&_coefficients);CHKERRQ(ierr);
+  ierr = VecDestroy(&local_fields);CHKERRQ(ierr);
+
+  ierr = PetscFClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+static void FormStressOperatorQ1(PetscScalar Ke[],PetscScalar coords[],PetscScalar E[],PetscScalar nu[])
+{
+  PetscInt    ngp;
+  PetscScalar gp_xi[GAUSS_POINTS][2];
+  PetscScalar gp_weight[GAUSS_POINTS];
+  PetscInt    p,i,j,k,l;
+  PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
+  PetscScalar J_p;
+  PetscScalar B[3][U_DOFS*NODES_PER_EL];
+  PetscScalar prop_E,prop_nu,factor,constit_D[3][3];
+
+  /* define quadrature rule */
+  ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
+
+  /* evaluate integral */
+  for (p = 0; p < ngp; p++) {
+    ConstructQ12D_GNi(gp_xi[p],GNi_p);
+    ConstructQ12D_GNx(GNi_p,GNx_p,coords,&J_p);
+
+    for (i = 0; i < NODES_PER_EL; i++) {
+      PetscScalar d_dx_i = GNx_p[0][i];
+      PetscScalar d_dy_i = GNx_p[1][i];
+
+      B[0][2*i] = d_dx_i;  B[0][2*i+1] = 0.0;
+      B[1][2*i] = 0.0;     B[1][2*i+1] = d_dy_i;
+      B[2][2*i] = d_dy_i;  B[2][2*i+1] = d_dx_i;
+    }
+
+    /* form D for the quadrature point */
+    prop_E          = E[p];
+    prop_nu         = nu[p];
+    factor          = prop_E / ((1.0+prop_nu)*(1.0-2.0*prop_nu));
+    constit_D[0][0] = 1.0-prop_nu;  constit_D[0][1] = prop_nu;      constit_D[0][2] = 0.0;
+    constit_D[1][0] = prop_nu;      constit_D[1][1] = 1.0-prop_nu;  constit_D[1][2] = 0.0;
+    constit_D[2][0] = 0.0;          constit_D[2][1] = 0.0;          constit_D[2][2] = 0.5*(1.0-2.0*prop_nu);
+    for (i = 0; i < 3; i++) {
+      for (j = 0; j < 3; j++) {
+        constit_D[i][j] = factor * constit_D[i][j] * gp_weight[p] * J_p;
+      }
+    }
+
+    /* form Bt tildeD B */
+    /*
+     Ke_ij = Bt_ik . D_kl . B_lj
+     = B_ki . D_kl . B_lj
+     */
+    for (i = 0; i < 8; i++) {
+      for (j = 0; j < 8; j++) {
+        for (k = 0; k < 3; k++) {
+          for (l = 0; l < 3; l++) {
+            Ke[8*i+j] = Ke[8*i+j] + B[k][i] * constit_D[k][l] * B[l][j];
+          }
+        }
+      }
+    }
+
+  } /* end quadrature */
+}
+
+static void FormMomentumRhsQ1(PetscScalar Fe[],PetscScalar coords[],PetscScalar fx[],PetscScalar fy[])
+{
+  PetscInt    ngp;
+  PetscScalar gp_xi[GAUSS_POINTS][2];
+  PetscScalar gp_weight[GAUSS_POINTS];
+  PetscInt    p,i;
+  PetscScalar Ni_p[NODES_PER_EL];
+  PetscScalar GNi_p[NSD][NODES_PER_EL],GNx_p[NSD][NODES_PER_EL];
+  PetscScalar J_p,fac;
+
+  /* define quadrature rule */
+  ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
+
+  /* evaluate integral */
+  for (p = 0; p < ngp; p++) {
+    ConstructQ12D_Ni(gp_xi[p],Ni_p);
+    ConstructQ12D_GNi(gp_xi[p],GNi_p);
+    ConstructQ12D_GNx(GNi_p,GNx_p,coords,&J_p);
+    fac = gp_weight[p]*J_p;
+
+    for (i = 0; i < NODES_PER_EL; i++) {
+      Fe[NSD*i]   += fac*Ni_p[i]*fx[p];
+      Fe[NSD*i+1] += fac*Ni_p[i]*fy[p];
+    }
+  }
+}
+
+/*
+ i,j are the element indices
+ The unknown is a vector quantity.
+ The s[].c is used to indicate the degree of freedom.
+ */
+#undef __FUNCT__
+#define __FUNCT__ "DMDAGetElementEqnums_u"
+static PetscErrorCode DMDAGetElementEqnums_u(MatStencil s_u[],PetscInt i,PetscInt j)
+{
+  PetscFunctionBeginUser;
+  /* displacement */
+  /* node 0 */
+  s_u[0].i = i;s_u[0].j = j;s_u[0].c = 0;          /* Ux0 */
+  s_u[1].i = i;s_u[1].j = j;s_u[1].c = 1;          /* Uy0 */
+
+  /* node 1 */
+  s_u[2].i = i;s_u[2].j = j+1;s_u[2].c = 0;        /* Ux1 */
+  s_u[3].i = i;s_u[3].j = j+1;s_u[3].c = 1;        /* Uy1 */
+
+  /* node 2 */
+  s_u[4].i = i+1;s_u[4].j = j+1;s_u[4].c = 0;      /* Ux2 */
+  s_u[5].i = i+1;s_u[5].j = j+1;s_u[5].c = 1;      /* Uy2 */
+
+  /* node 3 */
+  s_u[6].i = i+1;s_u[6].j = j;s_u[6].c = 0;        /* Ux3 */
+  s_u[7].i = i+1;s_u[7].j = j;s_u[7].c = 1;        /* Uy3 */
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "GetElementCoords"
+static PetscErrorCode GetElementCoords(DMDACoor2d **_coords,PetscInt ei,PetscInt ej,PetscScalar el_coords[])
+{
+  PetscFunctionBeginUser;
+  /* get coords for the element */
+  el_coords[NSD*0+0] = _coords[ej][ei].x;      el_coords[NSD*0+1] = _coords[ej][ei].y;
+  el_coords[NSD*1+0] = _coords[ej+1][ei].x;    el_coords[NSD*1+1] = _coords[ej+1][ei].y;
+  el_coords[NSD*2+0] = _coords[ej+1][ei+1].x;  el_coords[NSD*2+1] = _coords[ej+1][ei+1].y;
+  el_coords[NSD*3+0] = _coords[ej][ei+1].x;    el_coords[NSD*3+1] = _coords[ej][ei+1].y;
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "AssembleA_Elasticity"
+static PetscErrorCode AssembleA_Elasticity(Mat A,DM elas_da,DM properties_da,Vec properties)
+{
+  DM                     cda;
+  Vec                    coords;
+  DMDACoor2d             **_coords;
+  MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
+  PetscInt               sex,sey,mx,my;
+  PetscInt               ei,ej;
+  PetscScalar            Ae[NODES_PER_EL*U_DOFS*NODES_PER_EL*U_DOFS];
+  PetscScalar            el_coords[NODES_PER_EL*NSD];
+  Vec                    local_properties;
+  GaussPointCoefficients **props;
+  PetscScalar            *prop_E,*prop_nu;
+  PetscErrorCode         ierr;
+
+  PetscFunctionBeginUser;
+  /* setup for coords */
+  ierr = DMGetCoordinateDM(elas_da,&cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(elas_da,&coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
+
+  /* setup for coefficients */
+  ierr = DMCreateLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalBegin(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalEnd(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(properties_da,local_properties,&props);CHKERRQ(ierr);
+
+  ierr = DMDAGetElementCorners(elas_da,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
+  for (ej = sey; ej < sey+my; ej++) {
+    for (ei = sex; ei < sex+mx; ei++) {
+      /* get coords for the element */
+      GetElementCoords(_coords,ei,ej,el_coords);
+
+      /* get coefficients for the element */
+      prop_E  = props[ej][ei].E;
+      prop_nu = props[ej][ei].nu;
+
+      /* initialise element stiffness matrix */
+      ierr = PetscMemzero(Ae,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
+
+      /* form element stiffness matrix */
+      FormStressOperatorQ1(Ae,el_coords,prop_E,prop_nu);
+
+      /* insert element matrix into global matrix */
+      ierr = DMDAGetElementEqnums_u(u_eqn,ei,ej);CHKERRQ(ierr);
+      ierr = MatSetValuesStencil(A,NODES_PER_EL*U_DOFS,u_eqn,NODES_PER_EL*U_DOFS,u_eqn,Ae,ADD_VALUES);CHKERRQ(ierr);
+    }
+  }
+  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(properties_da,local_properties,&props);CHKERRQ(ierr);
+  ierr = VecDestroy(&local_properties);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDASetValuesLocalStencil_ADD_VALUES"
+static PetscErrorCode DMDASetValuesLocalStencil_ADD_VALUES(ElasticityDOF **fields_F,MatStencil u_eqn[],PetscScalar Fe_u[])
+{
+  PetscInt n;
+
+  PetscFunctionBeginUser;
+  for (n = 0; n < 4; n++) {
+    fields_F[u_eqn[2*n].j][u_eqn[2*n].i].ux_dof     = fields_F[u_eqn[2*n].j][u_eqn[2*n].i].ux_dof+Fe_u[2*n];
+    fields_F[u_eqn[2*n+1].j][u_eqn[2*n+1].i].uy_dof = fields_F[u_eqn[2*n+1].j][u_eqn[2*n+1].i].uy_dof+Fe_u[2*n+1];
+  }
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "AssembleF_Elasticity"
+static PetscErrorCode AssembleF_Elasticity(Vec F,DM elas_da,DM properties_da,Vec properties)
+{
+  DM                     cda;
+  Vec                    coords;
+  DMDACoor2d             **_coords;
+  MatStencil             u_eqn[NODES_PER_EL*U_DOFS]; /* 2 degrees of freedom */
+  PetscInt               sex,sey,mx,my;
+  PetscInt               ei,ej;
+  PetscScalar            Fe[NODES_PER_EL*U_DOFS];
+  PetscScalar            el_coords[NODES_PER_EL*NSD];
+  Vec                    local_properties;
+  GaussPointCoefficients **props;
+  PetscScalar            *prop_fx,*prop_fy;
+  Vec                    local_F;
+  ElasticityDOF          **ff;
+  PetscErrorCode         ierr;
+
+  PetscFunctionBeginUser;
+  /* setup for coords */
+  ierr = DMGetCoordinateDM(elas_da,&cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(elas_da,&coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
+
+  /* setup for coefficients */
+  ierr = DMGetLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalBegin(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
+  ierr = DMGlobalToLocalEnd(properties_da,properties,INSERT_VALUES,local_properties);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(properties_da,local_properties,&props);CHKERRQ(ierr);
+
+  /* get acces to the vector */
+  ierr = DMGetLocalVector(elas_da,&local_F);CHKERRQ(ierr);
+  ierr = VecZeroEntries(local_F);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(elas_da,local_F,&ff);CHKERRQ(ierr);
+
+
+  ierr = DMDAGetElementCorners(elas_da,&sex,&sey,0,&mx,&my,0);CHKERRQ(ierr);
+  for (ej = sey; ej < sey+my; ej++) {
+    for (ei = sex; ei < sex+mx; ei++) {
+      /* get coords for the element */
+      GetElementCoords(_coords,ei,ej,el_coords);
+
+      /* get coefficients for the element */
+      prop_fx = props[ej][ei].fx;
+      prop_fy = props[ej][ei].fy;
+
+      /* initialise element stiffness matrix */
+      ierr = PetscMemzero(Fe,sizeof(PetscScalar)*NODES_PER_EL*U_DOFS);CHKERRQ(ierr);
+
+      /* form element stiffness matrix */
+      FormMomentumRhsQ1(Fe,el_coords,prop_fx,prop_fy);
+
+      /* insert element matrix into global matrix */
+      ierr = DMDAGetElementEqnums_u(u_eqn,ei,ej);CHKERRQ(ierr);
+
+      ierr = DMDASetValuesLocalStencil_ADD_VALUES(ff,u_eqn,Fe);CHKERRQ(ierr);
+    }
+  }
+
+  ierr = DMDAVecRestoreArray(elas_da,local_F,&ff);CHKERRQ(ierr);
+  ierr = DMLocalToGlobalBegin(elas_da,local_F,ADD_VALUES,F);CHKERRQ(ierr);
+  ierr = DMLocalToGlobalEnd(elas_da,local_F,ADD_VALUES,F);CHKERRQ(ierr);
+  ierr = DMRestoreLocalVector(elas_da,&local_F);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(properties_da,local_properties,&props);CHKERRQ(ierr);
+  ierr = DMRestoreLocalVector(properties_da,&local_properties);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "solve_elasticity_2d"
+static PetscErrorCode solve_elasticity_2d(PetscInt mx,PetscInt my)
+{
+  DM                     elas_da,da_prop;
+  PetscInt               u_dof,dof,stencil_width;
+  Mat                    A;
+  PetscInt               mxl,myl;
+  DM                     prop_cda,vel_cda;
+  Vec                    prop_coords,vel_coords;
+  PetscInt               si,sj,nx,ny,i,j,p;
+  Vec                    f,X;
+  PetscInt               prop_dof,prop_stencil_width;
+  Vec                    properties,l_properties;
+  MatNullSpace           matnull;
+  PetscReal              dx,dy;
+  PetscInt               M,N;
+  DMDACoor2d             **_prop_coords,**_vel_coords;
+  GaussPointCoefficients **element_props;
+  KSP                    ksp_E;
+  PetscInt               coefficient_structure = 0;
+  PetscInt               cpu_x,cpu_y,*lx = NULL,*ly = NULL;
+  PetscBool              use_gp_coords = PETSC_FALSE;
+  PetscBool              use_nonsymbc  = PETSC_FALSE;
+  PetscBool              no_view       = PETSC_FALSE;
+  PetscBool              flg;
+  PetscErrorCode         ierr;
+
+  PetscFunctionBeginUser;
+  /* Generate the da for velocity and pressure */
+  /*
+   We use Q1 elements for the temperature.
+   FEM has a 9-point stencil (BOX) or connectivity pattern
+   Num nodes in each direction is mx+1, my+1
+   */
+  u_dof         = U_DOFS; /* Vx, Vy - velocities */
+  dof           = u_dof;
+  stencil_width = 1;
+  ierr          = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE,DMDA_STENCIL_BOX,
+                               mx+1,my+1,PETSC_DECIDE,PETSC_DECIDE,dof,stencil_width,NULL,NULL,&elas_da);CHKERRQ(ierr);
+  ierr = DMDASetFieldName(elas_da,0,"Ux");CHKERRQ(ierr);
+  ierr = DMDASetFieldName(elas_da,1,"Uy");CHKERRQ(ierr);
+
+  /* unit box [0,1] x [0,1] */
+  ierr = DMDASetUniformCoordinates(elas_da,0.0,1.0,0.0,1.0,0.0,1.0);CHKERRQ(ierr);
+
+
+  /* Generate element properties, we will assume all material properties are constant over the element */
+  /* local number of elements */
+  ierr = DMDAGetLocalElementSize(elas_da,&mxl,&myl,NULL);CHKERRQ(ierr);
+
+  /* !!! IN PARALLEL WE MUST MAKE SURE THE TWO DMDA's ALIGN !!! */
+  ierr = DMDAGetInfo(elas_da,0,0,0,0,&cpu_x,&cpu_y,0,0,0,0,0,0,0);CHKERRQ(ierr);
+  ierr = DMDAGetElementOwnershipRanges2d(elas_da,&lx,&ly);CHKERRQ(ierr);
+
+  prop_dof           = (PetscInt)(sizeof(GaussPointCoefficients)/sizeof(PetscScalar)); /* gauss point setup */
+  prop_stencil_width = 0;
+  ierr               = DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE,DMDA_STENCIL_BOX,
+                                    mx,my,cpu_x,cpu_y,prop_dof,prop_stencil_width,lx,ly,&da_prop);CHKERRQ(ierr);
+  ierr = PetscFree(lx);CHKERRQ(ierr);
+  ierr = PetscFree(ly);CHKERRQ(ierr);
+
+  /* define centroid positions */
+  ierr = DMDAGetInfo(da_prop,0,&M,&N,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
+  dx   = 1.0/((PetscReal)(M));
+  dy   = 1.0/((PetscReal)(N));
+
+  ierr = DMDASetUniformCoordinates(da_prop,0.0+0.5*dx,1.0-0.5*dx,0.0+0.5*dy,1.0-0.5*dy,0.0,1.0);CHKERRQ(ierr);
+
+  /* define coefficients */
+  ierr = PetscOptionsGetInt(NULL,"-c_str",&coefficient_structure,NULL);CHKERRQ(ierr);
+
+  ierr = DMCreateGlobalVector(da_prop,&properties);CHKERRQ(ierr);
+  ierr = DMCreateLocalVector(da_prop,&l_properties);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(da_prop,l_properties,&element_props);CHKERRQ(ierr);
+
+  ierr = DMGetCoordinateDM(da_prop,&prop_cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(da_prop,&prop_coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(prop_cda,prop_coords,&_prop_coords);CHKERRQ(ierr);
+
+  ierr = DMDAGetGhostCorners(prop_cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
+
+  ierr = DMGetCoordinateDM(elas_da,&vel_cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(elas_da,&vel_coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(vel_cda,vel_coords,&_vel_coords);CHKERRQ(ierr);
+
+
+  /* interpolate the coordinates */
+  for (j = sj; j < sj+ny; j++) {
+    for (i = si; i < si+nx; i++) {
+      PetscInt    ngp;
+      PetscScalar gp_xi[GAUSS_POINTS][2],gp_weight[GAUSS_POINTS];
+      PetscScalar el_coords[8];
+
+      ierr = GetElementCoords(_vel_coords,i,j,el_coords);CHKERRQ(ierr);
+      ConstructGaussQuadrature(&ngp,gp_xi,gp_weight);
+
+      for (p = 0; p < GAUSS_POINTS; p++) {
+        PetscScalar gp_x,gp_y;
+        PetscInt    n;
+        PetscScalar xi_p[2],Ni_p[4];
+
+        xi_p[0] = gp_xi[p][0];
+        xi_p[1] = gp_xi[p][1];
+        ConstructQ12D_Ni(xi_p,Ni_p);
+
+        gp_x = 0.0;
+        gp_y = 0.0;
+        for (n = 0; n < NODES_PER_EL; n++) {
+          gp_x = gp_x+Ni_p[n]*el_coords[2*n];
+          gp_y = gp_y+Ni_p[n]*el_coords[2*n+1];
+        }
+        element_props[j][i].gp_coords[2*p]   = gp_x;
+        element_props[j][i].gp_coords[2*p+1] = gp_y;
+      }
+    }
+  }
+
+  /* define the coefficients */
+  ierr = PetscOptionsGetBool(NULL,"-use_gp_coords",&use_gp_coords,&flg);CHKERRQ(ierr);
+
+  for (j = sj; j < sj+ny; j++) {
+    for (i = si; i < si+nx; i++) {
+      PetscScalar              centroid_x = _prop_coords[j][i].x; /* centroids of cell */
+      PetscScalar              centroid_y = _prop_coords[j][i].y;
+      PETSC_UNUSED PetscScalar coord_x,coord_y;
+
+
+      if (coefficient_structure == 0) { /* isotropic */
+        PetscScalar opts_E,opts_nu;
+
+        opts_E  = 1.0;
+        opts_nu = 0.33;
+        ierr    = PetscOptionsGetScalar(NULL,"-iso_E",&opts_E,&flg);CHKERRQ(ierr);
+        ierr    = PetscOptionsGetScalar(NULL,"-iso_nu",&opts_nu,&flg);CHKERRQ(ierr);
+
+        for (p = 0; p < GAUSS_POINTS; p++) {
+          element_props[j][i].E[p]  = opts_E;
+          element_props[j][i].nu[p] = opts_nu;
+
+          element_props[j][i].fx[p] = 0.0;
+          element_props[j][i].fy[p] = 0.0;
+        }
+      } else if (coefficient_structure == 1) { /* step */
+        PetscScalar opts_E0,opts_nu0,opts_xc;
+        PetscScalar opts_E1,opts_nu1;
+
+        opts_E0  = opts_E1  = 1.0;
+        opts_nu0 = opts_nu1 = 0.333;
+        opts_xc  = 0.5;
+        ierr     = PetscOptionsGetScalar(NULL,"-step_E0",&opts_E0,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-step_nu0",&opts_nu0,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-step_E1",&opts_E1,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-step_nu1",&opts_nu1,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-step_xc",&opts_xc,&flg);CHKERRQ(ierr);
+
+        for (p = 0; p < GAUSS_POINTS; p++) {
+          coord_x = centroid_x;
+          coord_y = centroid_y;
+          if (use_gp_coords) {
+            coord_x = element_props[j][i].gp_coords[2*p];
+            coord_y = element_props[j][i].gp_coords[2*p+1];
+          }
+
+          element_props[j][i].E[p]  = opts_E0;
+          element_props[j][i].nu[p] = opts_nu0;
+          if (PetscRealPart(coord_x) > PetscRealPart(opts_xc)) {
+            element_props[j][i].E[p]  = opts_E1;
+            element_props[j][i].nu[p] = opts_nu1;
+          }
+
+          element_props[j][i].fx[p] = 0.0;
+          element_props[j][i].fy[p] = 0.0;
+        }
+      } else if (coefficient_structure == 2) { /* brick */
+        PetscReal values_E[10];
+        PetscReal values_nu[10];
+        PetscInt  nbricks,maxnbricks;
+        PetscInt  index,span;
+        PetscInt  jj;
+
+        flg        = PETSC_FALSE;
+        maxnbricks = 10;
+        ierr       = PetscOptionsGetRealArray(NULL, "-brick_E",values_E,&maxnbricks,&flg);CHKERRQ(ierr);
+        nbricks    = maxnbricks;
+        if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"User must supply a list of E values for each brick");CHKERRQ(ierr);
+
+        flg        = PETSC_FALSE;
+        maxnbricks = 10;
+        ierr       = PetscOptionsGetRealArray(NULL, "-brick_nu",values_nu,&maxnbricks,&flg);CHKERRQ(ierr);
+        if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"User must supply a list of nu values for each brick");CHKERRQ(ierr);
+        if (maxnbricks != nbricks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"User must supply equal numbers of values for E and nu");CHKERRQ(ierr);
+
+        span = 1;
+        ierr = PetscOptionsGetInt(NULL,"-brick_span",&span,&flg);CHKERRQ(ierr);
+
+        /* cycle through the indices so that no two material properties are repeated in lines of x or y */
+        jj    = (j/span)%nbricks;
+        index = (jj+i/span)%nbricks;
+        /*printf("j=%d: index = %d \n", j,index); */
+
+        for (p = 0; p < GAUSS_POINTS; p++) {
+          element_props[j][i].E[p]  = values_E[index];
+          element_props[j][i].nu[p] = values_nu[index];
+        }
+      } else if (coefficient_structure == 3) { /* sponge */
+        PetscScalar opts_E0,opts_nu0;
+        PetscScalar opts_E1,opts_nu1;
+        PetscInt    opts_t,opts_w;
+        PetscInt    ii,jj,ci,cj;
+
+        opts_E0  = opts_E1  = 1.0;
+        opts_nu0 = opts_nu1 = 0.333;
+        ierr     = PetscOptionsGetScalar(NULL,"-sponge_E0",&opts_E0,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-sponge_nu0",&opts_nu0,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-sponge_E1",&opts_E1,&flg);CHKERRQ(ierr);
+        ierr     = PetscOptionsGetScalar(NULL,"-sponge_nu1",&opts_nu1,&flg);CHKERRQ(ierr);
+
+        opts_t = opts_w = 1;
+        ierr   = PetscOptionsGetInt(NULL,"-sponge_t",&opts_t,&flg);CHKERRQ(ierr);
+        ierr   = PetscOptionsGetInt(NULL,"-sponge_w",&opts_w,&flg);CHKERRQ(ierr);
+
+        ii = (i)/(opts_t+opts_w+opts_t);
+        jj = (j)/(opts_t+opts_w+opts_t);
+
+        ci = i - ii*(opts_t+opts_w+opts_t);
+        cj = j - jj*(opts_t+opts_w+opts_t);
+
+        for (p = 0; p < GAUSS_POINTS; p++) {
+          element_props[j][i].E[p]  = opts_E0;
+          element_props[j][i].nu[p] = opts_nu0;
+        }
+        if ((ci >= opts_t) && (ci < opts_t+opts_w)) {
+          if ((cj >= opts_t) && (cj < opts_t+opts_w)) {
+            for (p = 0; p < GAUSS_POINTS; p++) {
+              element_props[j][i].E[p]  = opts_E1;
+              element_props[j][i].nu[p] = opts_nu1;
+            }
+          }
+        }
+
+      } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Unknown coefficient_structure");
+    }
+  }
+  ierr = DMDAVecRestoreArray(prop_cda,prop_coords,&_prop_coords);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(vel_cda,vel_coords,&_vel_coords);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(da_prop,l_properties,&element_props);CHKERRQ(ierr);
+  ierr = DMLocalToGlobalBegin(da_prop,l_properties,ADD_VALUES,properties);CHKERRQ(ierr);
+  ierr = DMLocalToGlobalEnd(da_prop,l_properties,ADD_VALUES,properties);CHKERRQ(ierr);
+
+  ierr = PetscOptionsGetBool(NULL,"-no_view",&no_view,NULL);CHKERRQ(ierr);
+  if (!no_view) {
+    ierr = DMDAViewCoefficientsGnuplot2d(da_prop,properties,"Coeffcients for elasticity eqn.","properties");CHKERRQ(ierr);
+    ierr = DMDACoordViewGnuplot2d(elas_da,"mesh");CHKERRQ(ierr);
+  }
+
+  /* Generate a matrix with the correct non-zero pattern of type AIJ. This will work in parallel and serial */
+  ierr = DMSetMatType(elas_da,MATAIJ);CHKERRQ(ierr);
+  ierr = DMCreateMatrix(elas_da,&A);CHKERRQ(ierr);
+  ierr = DMGetCoordinates(elas_da,&vel_coords);CHKERRQ(ierr);
+  ierr = MatNullSpaceCreateRigidBody(vel_coords,&matnull);CHKERRQ(ierr);
+  ierr = MatSetNearNullSpace(A,matnull);CHKERRQ(ierr);
+  ierr = MatNullSpaceDestroy(&matnull);CHKERRQ(ierr);
+  ierr = MatGetVecs(A,&f,&X);CHKERRQ(ierr);
+
+  /* assemble A11 */
+  ierr = MatZeroEntries(A);CHKERRQ(ierr);
+  ierr = VecZeroEntries(f);CHKERRQ(ierr);
+
+  ierr = AssembleA_Elasticity(A,elas_da,da_prop,properties);CHKERRQ(ierr);
+  /* build force vector */
+  ierr = AssembleF_Elasticity(f,elas_da,da_prop,properties);CHKERRQ(ierr);
+
+
+  ierr = KSPCreate(PETSC_COMM_WORLD,&ksp_E);CHKERRQ(ierr);
+  ierr = KSPSetOptionsPrefix(ksp_E,"elas_");CHKERRQ(ierr);  /* elasticity */
+
+  ierr = PetscOptionsGetBool(NULL,"-use_nonsymbc",&use_nonsymbc,&flg);CHKERRQ(ierr);
+  /* solve */
+  if (!use_nonsymbc) {
+    Mat        AA;
+    Vec        ff,XX;
+    IS         is;
+    VecScatter scat;
+
+    ierr = DMDABCApplySymmetricCompression(elas_da,A,f,&is,&AA,&ff);CHKERRQ(ierr);
+    ierr = VecDuplicate(ff,&XX);CHKERRQ(ierr);
+
+    ierr = KSPSetOperators(ksp_E,AA,AA);CHKERRQ(ierr);
+    ierr = KSPSetFromOptions(ksp_E);CHKERRQ(ierr);
+
+ ierr = KSPSetFromOptions(ksp_E);CHKERRQ(ierr);
+
+   PetscScalar T1,T2;
+    ierr = KSPSetTolerances(ksp_E, 1e-9, 1e-9, PETSC_DEFAULT, 50000000); CHKERRQ(ierr);
+    T1 = MPI_Wtime();
+
+    ierr = KSPSolve(ksp_E,ff,XX);CHKERRQ(ierr);
+    T2 = MPI_Wtime();
+    
+    Mat A;
+    Vec sol;
+    PetscScalar norm;
+    Vec X2;
+    
+    VecDuplicate(ff,&sol);
+    VecDuplicate(ff,&X2);
+    VecCopy(ff,X2);
+
+
+    KSPGetOperators(ksp_E,&A,NULL);
+    MatMult(A,XX,sol);
+    VecAXPY(sol,-1,ff);
+    VecNorm(sol, NORM_2, &norm);
+
+
+    ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Norm of error : %g\n", (double)norm); CHKERRQ(ierr);
+    ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Execution time : %g (s)\n", T2-T1); CHKERRQ(ierr);
+    PetscInt total;
+    ierr = KSPGetIterationNumber(ksp_E, &total); CHKERRQ(ierr);
+    ierr = PetscPrintf(PETSC_COMM_WORLD, "\t\t\t -- Total number of iterations : %D\n", total); CHKERRQ(ierr);
+
+
+
+
+    KrylovMinimize(A, ff, X2);
+    MatMult(A,X2,sol);
+
+
+    VecAXPY(sol,-1,ff);
+    VecNorm(sol, NORM_2, &norm);
+    ierr = PetscPrintf(PETSC_COMM_WORLD, "Error2 %g\n",norm);
+
+
+
+
+    VecCopy(ff,X2);
+    KrylovMinimizeLSQR(A, ff, X2);
+    MatMult(A,X2,sol);
+
+
+    VecAXPY(sol,-1,ff);
+    VecNorm(sol, NORM_2, &norm);
+    ierr = PetscPrintf(PETSC_COMM_WORLD, "ErrorLSQR %g\n",norm);
+
+
+
+    /* push XX back into X */
+    ierr = DMDABCApplyCompression(elas_da,NULL,X);CHKERRQ(ierr);
+
+    ierr = VecScatterCreate(XX,NULL,X,is,&scat);CHKERRQ(ierr);
+    ierr = VecScatterBegin(scat,XX,X,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+    ierr = VecScatterEnd(scat,XX,X,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+    ierr = VecScatterDestroy(&scat);CHKERRQ(ierr);
+
+    ierr = MatDestroy(&AA);CHKERRQ(ierr);
+    ierr = VecDestroy(&ff);CHKERRQ(ierr);
+    ierr = VecDestroy(&XX);CHKERRQ(ierr);
+    ierr = ISDestroy(&is);CHKERRQ(ierr);
+  } else {
+    ierr = DMDABCApplyCompression(elas_da,A,f);CHKERRQ(ierr);
+
+    ierr = KSPSetOperators(ksp_E,A,A);CHKERRQ(ierr);
+    ierr = KSPSetFromOptions(ksp_E);CHKERRQ(ierr);
+
+    ierr = KSPSolve(ksp_E,f,X);CHKERRQ(ierr);
+  }
+
+  if (!no_view) {ierr = DMDAViewGnuplot2d(elas_da,X,"Displacement solution for elasticity eqn.","X");CHKERRQ(ierr);}
+  ierr = KSPDestroy(&ksp_E);CHKERRQ(ierr);
+
+  ierr = VecDestroy(&X);CHKERRQ(ierr);
+  ierr = VecDestroy(&f);CHKERRQ(ierr);
+  ierr = MatDestroy(&A);CHKERRQ(ierr);
+
+  ierr = DMDestroy(&elas_da);CHKERRQ(ierr);
+  ierr = DMDestroy(&da_prop);CHKERRQ(ierr);
+
+  ierr = VecDestroy(&properties);CHKERRQ(ierr);
+  ierr = VecDestroy(&l_properties);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "main"
+int main(int argc,char **args)
+{
+  PetscErrorCode ierr;
+  PetscInt       mx,my;
+
+  ierr = PetscInitialize(&argc,&args,(char*)0,help);CHKERRQ(ierr);
+
+  mx   = my = 10;
+  ierr = PetscOptionsGetInt(NULL,"-mx",&mx,NULL);CHKERRQ(ierr);
+  ierr = PetscOptionsGetInt(NULL,"-my",&my,NULL);CHKERRQ(ierr);
+
+  ierr = solve_elasticity_2d(mx,my);CHKERRQ(ierr);
+
+  ierr = PetscFinalize();CHKERRQ(ierr);
+  return 0;
+}
+
+/* -------------------------- helpers for boundary conditions -------------------------------- */
+
+#undef __FUNCT__
+#define __FUNCT__ "BCApply_EAST"
+static PetscErrorCode BCApply_EAST(DM da,PetscInt d_idx,PetscScalar bc_val,Mat A,Vec b)
+{
+  DM                     cda;
+  Vec                    coords;
+  PetscInt               si,sj,nx,ny,i,j;
+  PetscInt               M,N;
+  DMDACoor2d             **_coords;
+  const PetscInt         *g_idx;
+  PetscInt               *bc_global_ids;
+  PetscScalar            *bc_vals;
+  PetscInt               nbcs;
+  PetscInt               n_dofs;
+  PetscErrorCode         ierr;
+  ISLocalToGlobalMapping ltogm;
+
+  PetscFunctionBeginUser;
+  /* enforce bc's */
+  ierr = DMGetLocalToGlobalMapping(da,&ltogm);CHKERRQ(ierr);
+  ierr = ISLocalToGlobalMappingGetIndices(ltogm,&g_idx);CHKERRQ(ierr);
+
+  ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
+  ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
+  ierr = DMDAGetInfo(da,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
+
+  /* --- */
+
+  ierr = PetscMalloc(sizeof(PetscInt)*ny*n_dofs,&bc_global_ids);CHKERRQ(ierr);
+  ierr = PetscMalloc(sizeof(PetscScalar)*ny*n_dofs,&bc_vals);CHKERRQ(ierr);
+
+  /* init the entries to -1 so VecSetValues will ignore them */
+  for (i = 0; i < ny*n_dofs; i++) bc_global_ids[i] = -1;
+
+  i = nx-1;
+  for (j = 0; j < ny; j++) {
+    PetscInt                 local_id;
+    PETSC_UNUSED PetscScalar coordx,coordy;
+
+    local_id = i+j*nx;
+
+    bc_global_ids[j] = g_idx[n_dofs*local_id+d_idx];
+
+    coordx = _coords[j+sj][i+si].x;
+    coordy = _coords[j+sj][i+si].y;
+
+    bc_vals[j] =  bc_val;
+  }
+  ierr = ISLocalToGlobalMappingRestoreIndices(ltogm,&g_idx);CHKERRQ(ierr);
+  nbcs = 0;
+  if ((si+nx) == (M)) nbcs = ny;
+
+  if (b) {
+    ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
+    ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
+    ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
+  }
+  if (A) {
+    ierr = MatZeroRows(A,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
+  }
+
+  ierr = PetscFree(bc_vals);CHKERRQ(ierr);
+  ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "BCApply_WEST"
+static PetscErrorCode BCApply_WEST(DM da,PetscInt d_idx,PetscScalar bc_val,Mat A,Vec b)
+{
+  DM                     cda;
+  Vec                    coords;
+  PetscInt               si,sj,nx,ny,i,j;
+  PetscInt               M,N;
+  DMDACoor2d             **_coords;
+  const PetscInt         *g_idx;
+  PetscInt               *bc_global_ids;
+  PetscScalar            *bc_vals;
+  PetscInt               nbcs;
+  PetscInt               n_dofs;
+  PetscErrorCode         ierr;
+  ISLocalToGlobalMapping ltogm;
+
+  PetscFunctionBeginUser;
+  /* enforce bc's */
+  ierr = DMGetLocalToGlobalMapping(da,&ltogm);CHKERRQ(ierr);
+  ierr = ISLocalToGlobalMappingGetIndices(ltogm,&g_idx);CHKERRQ(ierr);
+
+  ierr = DMGetCoordinateDM(da,&cda);CHKERRQ(ierr);
+  ierr = DMGetCoordinatesLocal(da,&coords);CHKERRQ(ierr);
+  ierr = DMDAVecGetArray(cda,coords,&_coords);CHKERRQ(ierr);
+  ierr = DMDAGetGhostCorners(cda,&si,&sj,0,&nx,&ny,0);CHKERRQ(ierr);
+  ierr = DMDAGetInfo(da,0,&M,&N,0,0,0,0,&n_dofs,0,0,0,0,0);CHKERRQ(ierr);
+
+  /* --- */
+
+  ierr = PetscMalloc(sizeof(PetscInt)*ny*n_dofs,&bc_global_ids);CHKERRQ(ierr);
+  ierr = PetscMalloc(sizeof(PetscScalar)*ny*n_dofs,&bc_vals);CHKERRQ(ierr);
+
+  /* init the entries to -1 so VecSetValues will ignore them */
+  for (i = 0; i < ny*n_dofs; i++) bc_global_ids[i] = -1;
+
+  i = 0;
+  for (j = 0; j < ny; j++) {
+    PetscInt                 local_id;
+    PETSC_UNUSED PetscScalar coordx,coordy;
+
+    local_id = i+j*nx;
+
+    bc_global_ids[j] = g_idx[n_dofs*local_id+d_idx];
+
+    coordx = _coords[j+sj][i+si].x;
+    coordy = _coords[j+sj][i+si].y;
+
+    bc_vals[j] =  bc_val;
+  }
+  ierr = ISLocalToGlobalMappingRestoreIndices(ltogm,&g_idx);CHKERRQ(ierr);
+  nbcs = 0;
+  if (si == 0) nbcs = ny;
+
+  if (b) {
+    ierr = VecSetValues(b,nbcs,bc_global_ids,bc_vals,INSERT_VALUES);CHKERRQ(ierr);
+    ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
+    ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
+  }
+  if (A) {
+    ierr = MatZeroRows(A,nbcs,bc_global_ids,1.0,0,0);CHKERRQ(ierr);
+  }
+
+  ierr = PetscFree(bc_vals);CHKERRQ(ierr);
+  ierr = PetscFree(bc_global_ids);CHKERRQ(ierr);
+
+  ierr = DMDAVecRestoreArray(cda,coords,&_coords);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDABCApplyCompression"
+static PetscErrorCode DMDABCApplyCompression(DM elas_da,Mat A,Vec f)
+{
+  PetscErrorCode ierr;
+
+  PetscFunctionBeginUser;
+  ierr = BCApply_EAST(elas_da,0,-1.0,A,f);CHKERRQ(ierr);
+  ierr = BCApply_EAST(elas_da,1, 0.0,A,f);CHKERRQ(ierr);
+  ierr = BCApply_WEST(elas_da,0,1.0,A,f);CHKERRQ(ierr);
+  ierr = BCApply_WEST(elas_da,1,0.0,A,f);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}
+
+#undef __FUNCT__
+#define __FUNCT__ "DMDABCApplySymmetricCompression"
+static PetscErrorCode DMDABCApplySymmetricCompression(DM elas_da,Mat A,Vec f,IS *dofs,Mat *AA,Vec *ff)
+{
+  PetscErrorCode ierr;
+  PetscInt       start,end,m;
+  PetscInt       *unconstrained;
+  PetscInt       cnt,i;
+  Vec            x;
+  PetscScalar    *_x;
+  IS             is;
+  VecScatter     scat;
+
+  PetscFunctionBeginUser;
+  /* push bc's into f and A */
+  ierr = VecDuplicate(f,&x);CHKERRQ(ierr);
+  ierr = BCApply_EAST(elas_da,0,-1.0,A,x);CHKERRQ(ierr);
+  ierr = BCApply_EAST(elas_da,1, 0.0,A,x);CHKERRQ(ierr);
+  ierr = BCApply_WEST(elas_da,0,1.0,A,x);CHKERRQ(ierr);
+  ierr = BCApply_WEST(elas_da,1,0.0,A,x);CHKERRQ(ierr);
+
+  /* define which dofs are not constrained */
+  ierr = VecGetLocalSize(x,&m);CHKERRQ(ierr);
+  ierr = PetscMalloc(sizeof(PetscInt)*m,&unconstrained);CHKERRQ(ierr);
+  ierr = VecGetOwnershipRange(x,&start,&end);CHKERRQ(ierr);
+  ierr = VecGetArray(x,&_x);CHKERRQ(ierr);
+  cnt  = 0;
+  for (i = 0; i < m; i++) {
+    PetscReal val;
+
+    val = PetscRealPart(_x[i]);
+    if (fabs(val) < 0.1) {
+      unconstrained[cnt] = start + i;
+      cnt++;
+    }
+  }
+  ierr = VecRestoreArray(x,&_x);CHKERRQ(ierr);
+
+  ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,unconstrained,PETSC_COPY_VALUES,&is);CHKERRQ(ierr);
+  ierr = PetscFree(unconstrained);CHKERRQ(ierr);
+
+  /* define correction for dirichlet in the rhs */
+  ierr = MatMult(A,x,f);CHKERRQ(ierr);
+  ierr = VecScale(f,-1.0);CHKERRQ(ierr);
+
+  /* get new matrix */
+  ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,AA);CHKERRQ(ierr);
+  /* get new vector */
+  ierr = MatGetVecs(*AA,NULL,ff);CHKERRQ(ierr);
+
+  ierr = VecScatterCreate(f,is,*ff,NULL,&scat);CHKERRQ(ierr);
+  ierr = VecScatterBegin(scat,f,*ff,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+  ierr = VecScatterEnd(scat,f,*ff,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+
+  {                             /* Constrain near-null space */
+    PetscInt nvecs;
+    const Vec *vecs;
+    Vec *uvecs;
+    PetscBool has_const;
+    MatNullSpace mnull,unull;
+    ierr = MatGetNearNullSpace(A,&mnull);CHKERRQ(ierr);
+    ierr = MatNullSpaceGetVecs(mnull,&has_const,&nvecs,&vecs);CHKERRQ(ierr);
+    ierr = VecDuplicateVecs(*ff,nvecs,&uvecs);CHKERRQ(ierr);
+    for (i=0; i<nvecs; i++) {
+      ierr = VecScatterBegin(scat,vecs[i],uvecs[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+      ierr = VecScatterEnd(scat,vecs[i],uvecs[i],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
+    }
+    ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)A),has_const,nvecs,uvecs,&unull);CHKERRQ(ierr);
+    ierr = MatSetNearNullSpace(*AA,unull);CHKERRQ(ierr);
+    ierr = MatNullSpaceDestroy(&unull);CHKERRQ(ierr);
+    for (i=0; i<nvecs; i++) {
+      ierr = VecDestroy(&uvecs[i]);CHKERRQ(ierr);
+    }
+    ierr = PetscFree(uvecs);CHKERRQ(ierr);
+  }
+
+  ierr = VecScatterDestroy(&scat);CHKERRQ(ierr);
+
+  *dofs = is;
+  ierr  = VecDestroy(&x);CHKERRQ(ierr);
+  PetscFunctionReturn(0);
+}