1 #include <cuda_runtime.h>
2 #include <cutil_inline.h>
3 #include "fast_kernels.cu"
5 int main(int argc, char **argv){
7 short *h_in, *h_out, *d_out ;
8 int size, bsx=16, bsy=16 ; ;
9 dim3 dimBlock, dimGrid ;
10 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<short>();
11 cudaArray * array_img_in ;
13 /* ... load image in h_in and determine H and W here ...*/
14 cudaHostAlloc((void**)&h_out, size, cudaHostAllocDefault) ;
15 cudaMalloc((void**) &d_out, size);
16 cudaMallocArray( &array_img_in, &channelDesc, W, H );
17 cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc);
18 cudaMemcpyToArray( array_img_in, 0, 0, h_in, size, cudaMemcpyHostToDevice);
19 dimBlock = dim3(bsx,bsy,1) ;
20 dimGrid = dim3( L/dimBlock.x, H/dimBlock.y, 1) ;
22 kernel_test<<< dimGrid, dimBlock, 0>>>(d_out, W, H) ;
24 cutilSafeCall( cudaMemcpy(h_out , d_out, size, cudaMemcpyDeviceToHost) ) ;
26 /* ... use results here ... */