correct

[book_gpu.git] / BookGPU / Chapters / chapter19 / code.cu
diff --git a/BookGPU/Chapters/chapter19/code.cu b/BookGPU/Chapters/chapter19/code.cu

index 3a95e5529817208c3cd70355db5279ba951ab78c..c7bad4048509371481eb3433170e60dc6d53a99c 100644 (file)
--- a/BookGPU/Chapters/chapter19/code.cu
+++ b/BookGPU/Chapters/chapter19/code.cu
@@ -1,4 +1,5 @@
-// compute y = B*x (B is stored in SCOO formats [ cols, rows, values, offsets, numPacks, numRows ])
+// compute y = B*x (B is stored in SCOO formats [ cols, rows, values, 
+//offsets, numPacks, numRows ])
  // LANE_SIZE = 2^k
  // NUM_ROWS_PER_SLICE is computed based on sparsity
  template <const uint32_t THREADS_PER_BLOCK, const uint32_t NUM_ROWS_PER_SLICE, const uint32_t LANE_SIZE>
  // LANE_SIZE = 2^k
  // NUM_ROWS_PER_SLICE is computed based on sparsity
  template <const uint32_t THREADS_PER_BLOCK, const uint32_t NUM_ROWS_PER_SLICE, const uint32_t LANE_SIZE>
@@ -13,7 +14,9 @@ sliced_coo_kernel(
                  const float * x,
                        float * y)
  {
                  const float * x,
                        float * y)
  {
-    const int thread_lane = threadIdx.x & (LANE_SIZE-1); // ~ threadIdx.x % LANE_SIZE
+    // ~ threadIdx.x % LANE_SIZE
+        const int thread_lane = threadIdx.x & (LANE_SIZE-1); 
+               
      const int row_lane = threadIdx.x/(LANE_SIZE);
  
      __shared__ float sdata[NUM_ROWS_PER_SLICE][LANE_SIZE];
      const int row_lane = threadIdx.x/(LANE_SIZE);
  
      __shared__ float sdata[NUM_ROWS_PER_SLICE][LANE_SIZE];