1 #include <cuda_runtime.h>
2 #include <cutil_inline.h>
3 #include "fast_kernels.cu"
5 int main(int argc, char **argv){
6 cudaSetDevice( 0 ); // select first GPU
7 char filename[80] = "image.pgm" ;
8 short *h_in, *h_out, *d_out ;
9 int size, bsx=16, bsy=16 ;
10 dim3 dimBlock, dimGrid ;
11 cudaChannelFormatDesc channelD=cudaCreateChannelDesc<short>();
12 cudaArray * array_img_in ;
13 /*....................... load image and cast...........*/
14 unsigned int * h_img = NULL ;
15 unsigned int *h_outui, H, L ;
16 cutilCheckError( cutLoadPGMi(filename, &h_img, &L, &H));
17 size = H * L * sizeof( short );
18 h_in = new short[H*L] ;
19 for (int k=0; k<H*L ; k++)
20 h_in[k] = (short)h_img[k] ;
21 /*....................... end of image load.............*/
22 cudaHostAlloc((void**)&h_out, size, cudaHostAllocDefault) ;
23 cudaMalloc((void**) &d_out, size);
24 cudaMallocArray( &array_img_in, &channelD, W, H );
25 cudaBindTextureToArray( tex_img_in, array_img_in, channelD);
26 cudaMemcpyToArray( array_img_in, 0, 0, h_in, size, cudaMemcpyHostToDevice);
27 dimBlock = dim3(bsx,bsy,1) ;
28 dimGrid = dim3( L/dimBlock.x, H/dimBlock.y, 1) ;
30 kernel_test<<< dimGrid, dimBlock, 0>>>(d_out, W, H) ;
32 cutilSafeCall( cudaMemcpy(h_out , d_out, size, cudaMemcpyDeviceToHost) ) ;
33 /*...............cast and save output image (optional) */
34 h_outui = new unsigned int[H*L] ;
35 for (int k=0; k<H*L ; k++)
36 h_outui[k] = (unsigned int)h_out[k] ;
37 cutilCheckError( cutSavePGMi("image_out.pgm", h_outui, L, H) ) ;