]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter13/ex2.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
ch15
[book_gpu.git] / BookGPU / Chapters / chapter13 / ex2.cu
1 /* Kernel of the matrix-vector multiplication */
2 __global__ void MV_Multiplication (..., double* U, double* Y)
3 {
4         ...
5         //Matrix coefficients filled in registers
6
7         for(int tz=0; tz<slices; tz++){
8                 if((tx<NX) && (ty<ny) && (tid<n)){
9                         double sum = Center * fetch_double(U, tid);
10                         if(tx != 0)      sum += West  * fetch_double(U,tid-1);
11                         if(tx != (NX-1)) sum += East  * fetch_double(U,tid+1);
12                         if(ty != 0)      sum += South * fetch_double(U,tid-NX);
13                         if(ty != (ny-1)) sum += North * fetch_double(U,tid+NX);
14                         if(tz != 0)      sum += Rear  * fetch_double(U,tid-NX*ny);
15                         if(tz != (nz-1)) sum += Front * fetch_double(U,tid+NX*ny);
16                         Y[tid] = sum;
17                 }
18                 tid += stride;
19         }
20 }
21
22 /* Kernel of the vector elements updates */
23 __global__ void Vector_Updates(..., double* G,double* Y,double* U)
24 {
25         ...
26         //Matrix coefficient filled in a register: Center
27
28         for(int tz=0; tz<slices; tz++){
29                 if((tx<NX) && (ty<ny) && (tid<n)){
30                         double var = (G[tid]-Y[tid]) / Center + fetch_double(U,tid);
31                         if(var < 0.0) var = 0.0; //projection
32                         U[tid] = var;
33                 }
34                 tid += stride;
35         }
36 }
37
38 /* Function to be executed by the CPU */
39 void Computation_New_Vector_Elements(double*A,double* G,double* U)
40 {
41         double *Y;
42         //Allocate a GPU memory space for the vector Y
43         //Configure the kernel execution: grid and block
44         //Elements of vector U filled in the texture memory
45
46         MV_Multiplication<<<grid,block>>>(..., U, Y);
47         Vector_Updates<<<grid,block>>>(..., G, Y, U);
48 }