BookGPU/Chapters/chapter13/ex4.cu

   1 /* Kernel of the matrix-vector multiplication */
   2 __global__ void MV_Multiplication(...,double*G,double*U,double*Y)
   3 {
   4         ...
   5         //Matrix coefficients filled in registers:
   6         //Center, West, East, Front
   7
   8         for(int tz=0; tz<slices; tz++){
   9                 if((tx<NX) && (ty<ny) && (tid<n)){
  10                         double sum = G[tid] - Center * fetch_double(U,tid);
  11                         if(tx != 0)      sum -= West * fetch_double(U,tid-1);
  12                         if(tx != (NX-1)) sum -= East * fetch_double(U,tid+1);
  13                         if(tz != (nz-1)) sum -= Front * fetch_double(U,tid+NX*ny);
  14                         Y[tid] = sum;
  15                 }
  16                 tid += stride;
  17         }
  18 }
  19
  20 /* Kernel of the vector elements updates */
  21 __global__ void Vector_Updates(..., int rb, double* Y, double* U)
  22 {
  23         ...
  24         double val; //vector component computed in previous grid
  25         //Matrix coefficients filled in registers:
  26         //Center, South, North, Rear
  27
  28         for(int tz=0; tz<slices; tz++){
  29                 if((tx<NX) && (ty<ny) && (tid<n) && ((ty&1)==rb)){
  30                         double sum = Y[tid] - South * fetch_double(U,tid-NX) -
  31                                            North * fetch_double(U,tid+NX);
  32                         //val: computed in previous grid
  33                         if(tz != 0) sum -= Rear * val;
  34                         sum = (sum / Center) + fetch_double(U,tid);
  35                         if(sum < 0) sum = 0; //projection
  36                         U[tid] = val = sum; //update of U
  37                 }
  38                 tid += stride;
  39         }
  40 }
  41
  42 /* CPU Function */
  43 void Computation_New_Vector_Components(double*A,double*G,double*U)
  44 {
  45         double *Y;
  46         int red=0, black=1;
  47         //Allocate a GPU memory space for the vector Y
  48         //Configure the kernel execution: grid and block
  49         //Elements of vector U filled in the texture memory
  50
  51         MV_Multiplication<<<grid,block>>>(..., G, U, Y);
  52         Vector_Updates<<<grid,block>>>(..., red, Y, U);
  53         Vector_Updates<<<grid,block>>>(..., black, Y, U);
  54 }