X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/book_gpu.git/blobdiff_plain/0cd5e33087ba2395b349cfca863c4c73948029ab..a79be71669f1b6fa38df1cc606be8e2b39fc4428:/BookGPU/Chapters/chapter19/code.cu?ds=sidebyside diff --git a/BookGPU/Chapters/chapter19/code.cu b/BookGPU/Chapters/chapter19/code.cu index 3a95e55..c7bad40 100644 --- a/BookGPU/Chapters/chapter19/code.cu +++ b/BookGPU/Chapters/chapter19/code.cu @@ -1,4 +1,5 @@ -// compute y = B*x (B is stored in SCOO formats [ cols, rows, values, offsets, numPacks, numRows ]) +// compute y = B*x (B is stored in SCOO formats [ cols, rows, values, +//offsets, numPacks, numRows ]) // LANE_SIZE = 2^k // NUM_ROWS_PER_SLICE is computed based on sparsity template @@ -13,7 +14,9 @@ sliced_coo_kernel( const float * x, float * y) { - const int thread_lane = threadIdx.x & (LANE_SIZE-1); // ~ threadIdx.x % LANE_SIZE + // ~ threadIdx.x % LANE_SIZE + const int thread_lane = threadIdx.x & (LANE_SIZE-1); + const int row_lane = threadIdx.x/(LANE_SIZE); __shared__ float sdata[NUM_ROWS_PER_SLICE][LANE_SIZE];