1 __global__ void kernel_convoSep8HL3x8pG( unsigned char *input, unsigned char *output,
6 float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0, outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ;
8 // coordonnees absolues du point de base
9 int j = (__mul24( blockIdx.x, blockDim.x ) + threadIdx.x)<<3 ;
10 int i = __mul24( blockIdx.y, blockDim.y ) + threadIdx.y ;
12 baseIdx = __mul24(i , j_dim) + j ;
13 if (baseIdx >0 ) id = baseIdx-1 ; else id = baseIdx ;
14 val = input[id] ; // pixel 1 : 1 calcul
15 outval0 += masque[0]*val ;
16 val = input[baseIdx++] ; // pixel 2 : 2 calculs
17 outval0 += masque[1]*val ;
18 outval1 += masque[0]*val ;
19 val = input[baseIdx++] ; // pixels 3 à 8 : 3 calculs
20 outval0 += masque[2]*val ;
21 outval1 += masque[1]*val ;
22 outval2 += masque[0]*val ;
23 val = input[baseIdx++] ;
24 outval1 += masque[2]*val ;
25 outval2 += masque[1]*val ;
26 outval3 += masque[0]*val ;
27 val = input[baseIdx++] ;
28 outval2 += masque[2]*val ;
29 outval3 += masque[1]*val ;
30 outval4 += masque[0]*val ;
31 val = input[baseIdx++] ;
32 outval3 += masque[2]*val ;
33 outval4 += masque[1]*val ;
34 outval5 += masque[0]*val ;
35 val = input[baseIdx++] ;
36 outval4 += masque[2]*val ;
37 outval5 += masque[1]*val ;
38 outval6 += masque[0]*val ;
39 val = input[baseIdx++] ;
40 outval5 += masque[2]*val ;
41 outval6 += masque[1]*val ;
42 outval7 += masque[0]*val ;
43 val = input[baseIdx++] ; // pixel 9 : 2 calculs
44 outval6 += masque[2]*val ;
45 outval7 += masque[1]*val ;
46 val = input[baseIdx++] ; // pixel 10 : 1 calcul
47 outval7 += masque[2]*val ;
49 baseIdx = __mul24(i , j_dim) + j ;
50 output[ baseIdx++ ] = outval0 ; // les 8 sorties
51 output[ baseIdx++ ] = outval1 ;
52 output[ baseIdx++ ] = outval2 ;
53 output[ baseIdx++ ] = outval3 ;
54 output[ baseIdx++ ] = outval4 ;
55 output[ baseIdx++ ] = outval5 ;
56 output[ baseIdx++ ] = outval6 ;
57 output[ baseIdx++ ] = outval7 ;