#include #include #include "fast_kernels.cu" int main(int argc, char **argv){ cudaSetDevice( 0 ); short *h_in, *h_out, *d_out ; int size, bsx=16, bsy=16 ; ; dim3 dimBlock, dimGrid ; cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); cudaArray * array_img_in ; /* ... load image in h_in and determine H and W here ...*/ cudaHostAlloc((void**)&h_out, size, cudaHostAllocDefault) ; cudaMalloc((void**) &d_out, size); cudaMallocArray( &array_img_in, &channelDesc, W, H ); cudaBindTextureToArray( tex_img_in, array_img_in, channelDesc); cudaMemcpyToArray( array_img_in, 0, 0, h_in, size, cudaMemcpyHostToDevice); dimBlock = dim3(bsx,bsy,1) ; dimGrid = dim3( L/dimBlock.x, H/dimBlock.y, 1) ; kernel_test<<< dimGrid, dimBlock, 0>>>(d_out, W, H) ; cutilSafeCall( cudaMemcpy(h_out , d_out, size, cudaMemcpyDeviceToHost) ) ; /* ... use results here ... */ cudaFreeHost(h_out) ; cudaFree(d_out); return 0; }