BookGPU/Chapters/chapter11/code4.cu

   1 template<typename Tx>
   2 __device__ Tx Aver(Tx z,int i,int j, Tx *z)
   3          { return (z-z[j+1])/(j-i+1); }
   4
   5 template<typename Tx>
   6 __global__ void monotonizekernel(Tx *y, Tx *z, Tx *u, int *key, int n)
   7 {
   8    int i = threadIdx.x + blockIdx.x * blockDim.x;
   9    if(i<n) {
  10       int smallestJ = i;
  11       Tx curP, smallestP, curz=z[i];
  12       smallestP=Aver(curz,i,i,z);
  13       for(int j = i+1; j < n; j++) {
  14           curP=Aver(curz,i,j,z);
  15           if(smallestP>curP) {
  16                smallestJ = j;
  17                smallestP = curP;
  18           }
  19       }
  20       curP=y[i];
  21       if(curP > smallestP)
  22         t=smallestP;
  23       else
  24                                 smallestJ=i;
  25       key[i]=smallestJ;
  26       u[i]=t;
  27    }
  28 }
  29
  30 template< typename Tx >
  31 void MonotonizeData(Tx *y, int n, Tx *u)
  32 {
  33     thrust::less_equal<int> binary_pred;
  34     thrust::maximum<Tx>     binary_op2;
  35     thrust::device_vector<Tx> z_d(n+1);
  36     thrust::device_vector<int> keys_d(n);
  37     thrust::device_ptr<Tx> y_d(y), u_d(u);
  38     thrust::fill(u_d, u_d+n, -1e100);
  39     thrust::fill(keys_d.begin(), keys_d.end(), 0);
  40
  41     thrust::reverse_iterator< typename thrust::device_vector<Tx>::iterator>  y_reverse_b(y_d+n), y_reverse_end(y_d), z_reverse_b(z_d.end());
  42
  43     thrust::inclusive_scan(y_reverse_b,y_reverse_end,z_reverse_b+1);
  44
  45     monotonizekernel<<<grid, block>>>(y, thrust::raw_pointer_cast(&z_d[0]), u, thrust::raw_pointer_cast(&keys_d[0]), n );
  46
  47     thrust::sort(keys_d.begin(), keys_d.end());
  48     thrust::inclusive_scan_by_key(keys_d.begin(), keys_d.end(), u_d, u_d, binary_pred, binary_op2);
  49 }