]> AND Private Git Repository - book_gpu.git/blob - BookGPU/Chapters/chapter13/ex4.cu
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
correct
[book_gpu.git] / BookGPU / Chapters / chapter13 / ex4.cu
1 /* Kernel of the matrix-vector multiplication */
2 __global__ void MV_Multiplication(...,double*G,double*U,double*Y)
3 {
4         ...
5         //Matrix coefficients filled in registers:
6         //Center, West, East, Front
7
8         for(int tz=0; tz<slices; tz++){
9                 if((tx<NX) && (ty<ny) && (tid<n)){
10                         double sum = G[tid] - Center * fetch_double(U,tid);
11                         if(tx != 0)      sum -= West * fetch_double(U,tid-1);
12                         if(tx != (NX-1)) sum -= East * fetch_double(U,tid+1);
13                         if(tz != (nz-1)) sum -= Front * fetch_double(U,tid+NX*ny);
14                         Y[tid] = sum;
15                 }
16                 tid += stride;
17         }
18 }
19
20 /* Kernel of the vector elements updates */
21 __global__ void Vector_Updates(..., int rb, double* Y, double* U)
22 {
23         ...
24         double val; //vector component computed in previous grid
25         //Matrix coefficients filled in registers: 
26         //Center, South, North, Rear
27
28         for(int tz=0; tz<slices; tz++){
29                 if((tx<NX) && (ty<ny) && (tid<n) && ((ty&1)==rb)){
30                         double sum = Y[tid] - South * fetch_double(U,tid-NX) - 
31                                            North * fetch_double(U,tid+NX); 
32                         //val: computed in previous grid 
33                         if(tz != 0) sum -= Rear * val; 
34                         sum = (sum / Center) + fetch_double(U,tid);
35                         if(sum < 0) sum = 0; //projection
36                         U[tid] = val = sum; //update of U
37                 }
38                 tid += stride;
39         }
40 }
41
42 /* CPU Function */
43 void Computation_New_Vector_Components(double*A,double*G,double*U)
44 {
45         double *Y;
46         int red=0, black=1;
47         //Allocate a GPU memory space for the vector Y
48         //Configure the kernel execution: grid and block
49         //Elements of vector U filled in the texture memory
50
51         MV_Multiplication<<<grid,block>>>(..., G, U, Y);
52         Vector_Updates<<<grid,block>>>(..., red, Y, U);
53         Vector_Updates<<<grid,block>>>(..., black, Y, U);
54 }