BookGPU/Chapters/chapter13/ex2.cu

   1 /* Kernel of the matrix-vector multiplication */
   2 __global__ void MV_Multiplication (..., double* U, double* Y)
   3 {
   4         ...
   5         //Matrix coefficients filled in registers
   6
   7         for(int tz=0; tz<slices; tz++){
   8                 if((tx<NX) && (ty<ny) && (tid<n)){
   9                         double sum = Center * fetch_double(U, tid);
  10                         if(tx != 0)      sum += West  * fetch_double(U,tid-1);
  11                         if(tx != (NX-1)) sum += East  * fetch_double(U,tid+1);
  12                         if(ty != 0)      sum += South * fetch_double(U,tid-NX);
  13                         if(ty != (ny-1)) sum += North * fetch_double(U,tid+NX);
  14                         if(tz != 0)      sum += Rear  * fetch_double(U,tid-NX*ny);
  15                         if(tz != (nz-1)) sum += Front * fetch_double(U,tid+NX*ny);
  16                         Y[tid] = sum;
  17                 }
  18                 tid += stride;
  19         }
  20 }
  21
  22 /* Kernel of the vector elements updates */
  23 __global__ void Vector_Updates(..., double* G,double* Y,double* U)
  24 {
  25         ...
  26         //Matrix coefficient filled in a register: Center
  27
  28         for(int tz=0; tz<slices; tz++){
  29                 if((tx<NX) && (ty<ny) && (tid<n)){
  30                         double var = (G[tid]-Y[tid]) / Center + fetch_double(U,tid);
  31                         if(var < 0.0) var = 0.0; //projection
  32                         U[tid] = var;
  33                 }
  34                 tid += stride;
  35         }
  36 }
  37
  38 /* Function to be executed by the CPU */
  39 void Computation_New_Vector_Elements(double*A,double* G,double* U)
  40 {
  41         double *Y;
  42         //Allocate a GPU memory space for the vector Y
  43         //Configure the kernel execution: grid and block
  44         //Elements of vector U filled in the texture memory
  45
  46         MV_Multiplication<<<grid,block>>>(..., U, Y);
  47         Vector_Updates<<<grid,block>>>(..., G, Y, U);
  48 }