template work

author: Jeff Johnson <jhj@fb.com> 2016-06-08 22:54:27 +0300
committer: Jeff Johnson <jhj@fb.com> 2016-06-11 00:49:55 +0300
commit: 50107baf5fa7127990ad0f8cfc70a001127194aa (patch)
tree: 090af582715a645bd84d744bcb99f1a3d2bac0c7 /lib/THC/THCReduceApplyUtils.cuh
parent: f545703cf2ba602b4f9c7d4c7d53e3ef4f159dbb (diff)
1 files changed, 1 insertions, 282 deletions
diff --git a/lib/THC/THCReduceApplyUtils.cuh b/lib/THC/THCReduceApplyUtils.cuh
index d5ac2b5..06f969f 100644
--- a/lib/THC/THCReduceApplyUtils.cuh
+++ b/lib/THC/THCReduceApplyUtils.cuh
@@ -6,284 +6,12 @@
 #include "THCGeneral.h"
 #include "THCTensor.h"
 #include "THCDeviceUtils.cuh"
-
-// Maximum number of dimensions allowed for cutorch
-#define MAX_CUTORCH_DIMS 25
-
-// Warning string for tensor arguments that are too large or have too
-// many dimensions
-#define CUTORCH_STR(X) #X
-#define CUTORCH_DIM_WARNING "tensor too large or too many (>" \
-  CUTORCH_STR(MAX_CUTORCH_DIMS) ") dimensions"
+#include "THCTensorInfo.cuh"
 
 // Enum that indicates whether tensor arguments are read/write or
 // read-only
 enum TensorArgType { ReadWrite, ReadOnly };
 
-// Copy operator for the pointwise apply kernel
-template <typename T>
-struct CopyOp {
-  __device__ __forceinline__ void operator()(T* dst, T* src) {
-#if __CUDA_ARCH__ >= 350
-    *dst = __ldg(src);
-#else
-    *dst = *src;
-#endif
-  }
-};
-
-// CUDA kernel argument that defines tensor layout
-template <typename IndexType>
-struct TensorInfo {
-  // Extracts size/stride information for the kernel.
-  // The optional `reduceDim` indicates a reduction dimension for the
-  // given tensor, so that the resulting size for this dimension will be 1.
-  TensorInfo(THCState* state, THCudaTensor* t, int reduceDim = -1);
-
-  // Collapses all runs of successive dimensions if the size/strides
-  // match up within the run and there are no holes between the
-  // dimensions.
-  // If excludeDim is set (not -1), then excludeDim will not be
-  // collapsed with any other dimension.
-  // Function returns the new dimension index that excludeDim maps to,
-  // since the collapsed dimensions are <= the input dimensions.
-  int collapseDims(int excludeDim = -1);
-
-  // Contiguous tensors of more than one dimension are collapsed down
-  // to one tensor
-  __host__ __device__ inline bool isContiguous() const {
-    return (dims == 1 && strides[0] == 1);
-  }
-
-  float* data;
-  IndexType sizes[MAX_CUTORCH_DIMS];
-  IndexType strides[MAX_CUTORCH_DIMS];
-  int dims;
-};
-
-template <typename IndexType>
-TensorInfo<IndexType>::TensorInfo(THCState* state,
-                                  THCudaTensor* t,
-                                  int reduceDim) {
-  data = THCudaTensor_data(state, t);
-  dims = THCudaTensor_nDimension(state, t);
-  assert(dims <= MAX_CUTORCH_DIMS);
-
-  for (int i = 0; i < dims; ++i) {
-    sizes[i] = THCudaTensor_size(state, t, i);
-    strides[i] = THCudaTensor_stride(state, t, i);
-  }
-
-  assert(reduceDim == -1 || (reduceDim < dims && reduceDim >= 0));
-
-  if (reduceDim != -1) {
-    sizes[reduceDim] = 1;
-  }
-}
-
-template <typename IndexType>
-int
-TensorInfo<IndexType>::collapseDims(int excludeDim) {
-  // Find the innermost dimension not of size 1, since dimensions of size 1 are
-  // collapsible.
-  int firstNonOneDim = -1;
-
-  for (int i = dims - 1; i >= 0; --i) {
-    if (i == excludeDim) {
-      // We cannot collapse this dimension, even if it is size 1
-      firstNonOneDim = i;
-      break;
-    }
-
-    if (sizes[i] != 1) {
-      firstNonOneDim = i;
-      break;
-    }
-  }
-
-  // Special case: if all dimensions are of size 1, then this is a
-  // single-point tensor that we still have to operate on. Reduce to a
-  // single point.
-  if (firstNonOneDim == -1) {
-    assert(excludeDim == -1);
-
-    dims = 1;
-    sizes[0] = 1;
-    strides[0] = 1;
-
-    // Everything effectively got collapsed into this dimension
-    return 0;
-  }
-
-  // Count the number of successive dimensions that can be collapsed, from
-  // innermost to outermost.
-  int numCollapsed = 0;
-
-  // Skip the leading size 1 dims
-  numCollapsed += dims - 1 - firstNonOneDim;
-
-  // We perform one pass through to determine how many dimensions we
-  // can collapse, before calculating the actual size of the collapsed
-  // dimensions.
-  // size/strideInner are the size/strides of the previous inner
-  // non-collapsible dim we encounter.
-  long sizeInner = sizes[firstNonOneDim];
-  long strideInner = strides[firstNonOneDim];
-
-  for (int i = firstNonOneDim - 1; i >= 0; --i) {
-    long sizeOuter = sizes[i];
-    long strideOuter = strides[i];
-
-    // Don't collapse this dimension if we want to exclude it from
-    // collapsing.
-    // Since this code is attempting to collapse a subsequent
-    // dimension (i) with the preceding dimension (i + 1), we can only
-    // perform collapsing if the preceding dimension can be collapsed
-    // (i.e., not excludeDim)
-    if ((excludeDim != i) && (excludeDim != i + 1)) {
-      // The next outermost dimension can be skipped if size 1
-      if (sizeOuter == 1) {
-        ++numCollapsed;
-        continue;
-      }
-
-      // If the next outermost dimension is contiguous with the
-      // previous non-collapsed one, collapse it
-      if (strideOuter == strideInner * sizeInner) {
-        ++numCollapsed;
-
-        // This is the run of collapsed dimensions' size
-        sizeInner = sizeInner * sizeOuter;
-        continue;
-      }
-    }
-
-    // Otherwise, this new outer dimension at `i` cannot be collapsed
-    // because it is excluded from collapsing, or it is not contiguous
-    // with the previous inner dimension.
-    sizeInner = sizeOuter;
-    strideInner = strideOuter;
-  }
-
-  // This will be our new size/stride and dimension.
-  IndexType newSizes[MAX_CUTORCH_DIMS];
-  IndexType newStrides[MAX_CUTORCH_DIMS];
-
-  assert(numCollapsed < dims);
-  int newDims = dims - numCollapsed;
-
-  // We return the index of the excluded dimension that is excluded
-  // from being collapsed here.
-  int returnDim = -1;
-
-  // We perform a second pass through the dimensions to actually
-  // calculate the size of the collapsed dimensions.
-  int collapsedIndex = dims - numCollapsed - 1;
-  newSizes[collapsedIndex] = sizes[firstNonOneDim];
-  newStrides[collapsedIndex] = strides[firstNonOneDim];
-
-  if (firstNonOneDim == excludeDim) {
-    returnDim = collapsedIndex;
-  }
-
-  for (int i = firstNonOneDim - 1; i >= 0; --i) {
-    IndexType sizeOuter = sizes[i];
-    IndexType strideOuter = strides[i];
-
-    if ((excludeDim != i) && (excludeDim != i + 1)) {
-      if (sizeOuter == 1) {
-        // skip
-        continue;
-      }
-
-      if (strideOuter == newSizes[collapsedIndex] * newStrides[collapsedIndex]) {
-        // collapse
-        newSizes[collapsedIndex] *= sizeOuter;
-        continue;
-      }
-    }
-
-    // Otherwise, strides don't match, or dim `i` is excluded from
-    // collapsing.
-    --collapsedIndex;
-    assert(collapsedIndex >= 0);
-    assert(collapsedIndex < newDims);
-    newSizes[collapsedIndex] = sizeOuter;
-    newStrides[collapsedIndex] = strideOuter;
-
-    if (excludeDim == i) {
-      returnDim = collapsedIndex;
-    }
-  }
-
-  // We must have filled all the dimensions we're looking for
-  assert(collapsedIndex == 0);
-  assert((excludeDim == -1) || (returnDim != -1));
-
-  dims = newDims;
-
-  for (int i = 0; i < dims; ++i) {
-    sizes[i] = newSizes[i];
-    strides[i] = newStrides[i];
-  }
-
-  // After collapsing, the original `excludeDim` may have been
-  // renumbered to this new `returnDim`, since some dimensions could
-  // have been collapsed.
-  return returnDim;
-}
-
-// Translate a linear index for the apply to a float* offset;
-// specialized on `Dims` to reduce nvcc compilation time
-template <typename IndexType, int Dims>
-struct IndexToOffset {
-  static __host__ __device__ IndexType get(
-    IndexType linearId,
-    const TensorInfo<IndexType>& info) {
-    IndexType offset = 0;
-
-    // Use static dims
-    for (int i = Dims - 1; i >= 0; --i) {
-      IndexType curDimIndex = linearId % info.sizes[i];
-      IndexType curDimOffset = curDimIndex * info.strides[i];
-      offset += curDimOffset;
-
-      if (i > 0) {
-        linearId /= info.sizes[i];
-      }
-    }
-
-    return offset;
-  }
-};
-
-template <typename IndexType>
-struct IndexToOffset<IndexType, -2> {
-  static __forceinline__ __host__ __device__ IndexType
-    get(IndexType linearId, const TensorInfo<IndexType>& info) {
-    return linearId;
-  }
-};
-
-template <typename IndexType>
-struct IndexToOffset<IndexType, -1> {
-  static __forceinline__ __host__ __device__ IndexType
-    get(IndexType linearId, const TensorInfo<IndexType>& info) {
-    IndexType offset = 0;
-
-    // Use dynamic dims
-    for (int i = info.dims - 1; i >= 0; --i) {
-      IndexType curDimIndex = linearId % info.sizes[i];
-      IndexType curDimOffset = curDimIndex * info.strides[i];
-      offset += curDimOffset;
-
-      linearId /= info.sizes[i];
-    }
-
-    return offset;
-  }
-};
-
 template <typename IndexType>
 __device__ __forceinline__ IndexType getLinearBlockId() {
   return blockIdx.z * gridDim.y * gridDim.x +
@@ -347,16 +75,7 @@ __device__ T reduceBlock(T* smem,
 // Make sure the given tensor doesn't have too many dimensions
 void THCCheckTensorDims(THCState* state, THCudaTensor* tensor, int arg);
 
-// Returns true if all linear ID -> offset math can be performed using 32 bit
-// unsigned math, which is faster than 64 bit math
-THC_API bool THC_canUse32BitIndexMath(THCState* state, THCudaTensor* t);
-
 // Produces a grid with at least one point per tile
 THC_API bool THC_getGridFromTiles(long gridTiles, dim3& grid);
 
-// Determines if the given tensor has overlapping data points (i.e.,
-// is there more than one index into the tensor that references the
-// same piece of data)?
-THC_API bool THC_overlappingIndices(THCState* state, THCudaTensor* t);
-
 #endif // THC_REDUCE_APPLY_UTILS_INC
author	Jeff Johnson <jhj@fb.com>	2016-06-08 22:54:27 +0300
committer	Jeff Johnson <jhj@fb.com>	2016-06-11 00:49:55 +0300
commit	50107baf5fa7127990ad0f8cfc70a001127194aa (patch)
tree	090af582715a645bd84d744bcb99f1a3d2bac0c7 /lib/THC/THCReduceApplyUtils.cuh
parent	f545703cf2ba602b4f9c7d4c7d53e3ef4f159dbb (diff)