1 __global__ void kernel_convoGene8x8pL3( unsigned char *output, int j_dim )
6 // registres pour les 8 calculs
7 float outval0=0.0, outval1=0.0, outval2=0.0, outval3=0.0 ;
8 float outval4=0.0, outval5=0.0, outval6=0.0, outval7=0.0 ;
10 // coordonnees absolues du point de base, le premier du paquet
11 int j = ( __umul24( blockIdx.x, blockDim.x) + threadIdx.x)<< 3 ;
12 int i = ( __umul24( blockIdx.y, blockDim.y) + threadIdx.y) ;
14 for (ic=0 ; ic<L ; ic++) // pour chaque ligne de la zone d'intérêt
16 pix = tex2D(tex_img_inc, j+1, i-1+ic) ; // les colonnes centrales. Multiplicité 3
17 outval0 += mask[ __umul24(ic,L) +2 ]*pix ;
18 outval1 += mask[ __umul24(ic,L) +1 ]*pix ;
19 outval2 += mask[ __umul24(ic,L) ]*pix ;
20 pix = tex2D(tex_img_inc, j+2, i-1+ic) ;
21 outval1 += mask[ __umul24(ic,L) +2 ]*pix ;
22 outval2 += mask[ __umul24(ic,L) +1 ]*pix ;
23 outval3 += mask[ __umul24(ic,L) ]*pix ;
24 pix = tex2D(tex_img_inc, j+3, i-1+ic) ;
25 outval2 += mask[ __umul24(ic,L) +2 ]*pix ;
26 outval3 += mask[ __umul24(ic,L) +1 ]*pix ;
27 outval4 += mask[ __umul24(ic,L) ]*pix ;
28 pix = tex2D(tex_img_inc, j+4, i-1+ic) ;
29 outval3 += mask[ __umul24(ic,L) +2 ]*pix ;
30 outval4 += mask[ __umul24(ic,L) +1 ]*pix ;
31 outval5 += mask[ __umul24(ic,L) ]*pix ;
32 pix = tex2D(tex_img_inc, j+5, i-1+ic) ;
33 outval4 += mask[ __umul24(ic,L) +2 ]*pix ;
34 outval5 += mask[ __umul24(ic,L) +1 ]*pix ;
35 outval6 += mask[ __umul24(ic,L) ]*pix ;
36 pix = tex2D(tex_img_inc, j+6, i-1+ic) ;
37 outval5 += mask[ __umul24(ic,L) +2 ]*pix ;
38 outval6 += mask[ __umul24(ic,L) +1 ]*pix ;
39 outval7 += mask[ __umul24(ic,L) ]*pix ;
41 pix = tex2D(tex_img_inc, j, i-1+ic) ; // les colonnes extérieures
42 outval0 += mask[ __umul24(ic,L) +1 ]*pix ; // multiplicité 2
43 outval1 += mask[ __umul24(ic,L) ]*pix ;
44 pix = tex2D(tex_img_inc, j-1, i-1+ic) ;
45 outval0 += mask[ __umul24(ic,L) ]*pix ; // multiplicité 1
47 pix = tex2D(tex_img_inc, j+7, i-1+ic) ;
48 outval6 += mask[ __umul24(ic,L) +2 ]*pix ; // multiplicité 2
49 outval7 += mask[ __umul24(ic,L) +1 ]*pix ;
50 pix = tex2D(tex_img_inc, j+8, i-1+ic) ;
51 outval7 += mask[ __umul24(ic,L) +2 ]*pix ; // multiplicité 1
54 output[ __umul24(i, j_dim) + j ] = outval0 ; // les 8 sorties
55 output[ __umul24(i, j_dim) + j+1 ] = outval1 ;
56 output[ __umul24(i, j_dim) + j+2 ] = outval2 ;
57 output[ __umul24(i, j_dim) + j+3 ] = outval3 ;
58 output[ __umul24(i, j_dim) + j+4 ] = outval4 ;
59 output[ __umul24(i, j_dim) + j+5 ] = outval5 ;
60 output[ __umul24(i, j_dim) + j+6 ] = outval6 ;
61 output[ __umul24(i, j_dim) + j+7 ] = outval7 ;