#include "THCTensorTypeUtils.cuh" #include "THCTensor.h" #include "THCTensorCopy.h" #include "THCHalf.h" #include namespace { struct SizeAndStride { long size; long stride; }; int compareSizeAndStride(const void* a, const void* b) { const SizeAndStride* aS = (const SizeAndStride*) a; const SizeAndStride* bS = (const SizeAndStride*) b; return aS->stride < bS->stride; } } #define IMPL_TENSOR_UTILS(TENSOR_TYPE, DATA_TYPE) \ \ TENSOR_TYPE* \ TensorUtils::newTensor(THCState* state) { \ return TENSOR_TYPE##_new(state); \ } \ \ TENSOR_TYPE* \ TensorUtils::newContiguous(THCState* state, \ TENSOR_TYPE* t) { \ return TENSOR_TYPE##_newContiguous(state, t); \ } \ \ THLongStorage* \ TensorUtils::newSizeOf(THCState* state, \ TENSOR_TYPE* t) { \ return TENSOR_TYPE##_newSizeOf(state, t); \ } \ \ void \ TensorUtils::retain(THCState* state, \ TENSOR_TYPE* t) { \ TENSOR_TYPE##_retain(state, t); \ } \ \ void \ TensorUtils::free(THCState* state, \ TENSOR_TYPE* t) { \ TENSOR_TYPE##_free(state, t); \ } \ \ void \ TensorUtils::freeCopyTo(THCState* state, \ TENSOR_TYPE* src, \ TENSOR_TYPE* dst) { \ TENSOR_TYPE##_freeCopyTo(state, src, dst); \ } \ \ void \ TensorUtils::resize(THCState* state, \ TENSOR_TYPE* out, \ THLongStorage* sizes, \ THLongStorage* strides) { \ TENSOR_TYPE##_resize(state, out, sizes, strides); \ } \ \ void \ TensorUtils::resizeAs(THCState* state, \ TENSOR_TYPE* dst, \ TENSOR_TYPE* src) { \ TENSOR_TYPE##_resizeAs(state, dst, src); \ } \ \ DATA_TYPE* \ TensorUtils::getData(THCState* state, \ TENSOR_TYPE* t) { \ /* FIXME: no cast is required except for THCudaHalfTensor */ \ return (DATA_TYPE*) TENSOR_TYPE##_data(state, t); \ } \ \ long \ TensorUtils::getNumElements(THCState* state, \ TENSOR_TYPE* t) { \ return TENSOR_TYPE##_nElement(state, t); \ } \ \ long \ TensorUtils::getSize(THCState* state, \ TENSOR_TYPE* t, \ int dim) { \ return TENSOR_TYPE##_size(state, t, dim); \ } \ \ long \ TensorUtils::getStride(THCState* state, \ TENSOR_TYPE* t, \ int dim) { \ return TENSOR_TYPE##_stride(state, t, dim); \ } \ \ int \ TensorUtils::getDims(THCState* state, \ TENSOR_TYPE* t) { \ return TENSOR_TYPE##_nDimension(state, t); \ } \ \ bool \ TensorUtils::isContiguous(THCState* state, \ TENSOR_TYPE* t) { \ return TENSOR_TYPE##_isContiguous(state, t); \ } \ \ int \ TensorUtils::getDevice(THCState* state, \ TENSOR_TYPE* t) { \ return TENSOR_TYPE##_getDevice(state, t); \ } \ \ void \ TensorUtils::copyIgnoringOverlaps(THCState* state, \ TENSOR_TYPE* dst, \ TENSOR_TYPE* src) { \ return TENSOR_TYPE##_copyIgnoringOverlaps(state, dst, src); \ } \ \ bool \ TensorUtils::overlappingIndices(THCState* state, \ TENSOR_TYPE* t) { \ /* In this function, we don't care about permutations of the */ \ /* size/stride arrays (transpositions). */ \ /* We order the size/stride arrays by stride, skipping dimensions of */ \ /* size 1. Strides of dimensions of size 1 don't matter, since there */ \ /* is only one addressing point in them. */ \ /* In this reordered view, the tensor is contiguous if */ \ /* stride[dim] == size[dim + 1] * stride[dim + 1] for all `dim`. */ \ /* The tensor has holes if */ \ /* stride[dim] > size[dim + 1] * stride[dim + 1] for one or more */ \ /* `dim`. */ \ /* The tensor has overlaps if */ \ /* stride[dim] < size[dim + 1] * stride[dim + 1] for one or more */ \ /* `dim`, or the innermost stride is 0. */ \ \ /* Extract size/stride arrays; only consider size >1 dims. */ \ SizeAndStride info[MAX_CUTORCH_DIMS]; \ \ int dims = TensorUtils::getDims(state, t); \ int nonSize1Dims = 0; \ for (int i = 0; i < dims; ++i) { \ long size = TensorUtils::getSize(state, t, i); \ if (size > 1) { \ info[nonSize1Dims].size = size; \ info[nonSize1Dims].stride = \ TensorUtils::getStride(state, t, i); \ ++nonSize1Dims; \ } \ } \ \ if (nonSize1Dims == 0) { \ /* no overlap */ \ return false; \ } \ \ /* Ascending order (innermost dimension in sorted view is at [0]) */ \ qsort(info, nonSize1Dims, sizeof(SizeAndStride), compareSizeAndStride); \ \ /* Base case: innermost dimension must have stride >= 1 */ \ if (info[nonSize1Dims - 1].stride < 1) { \ return true; \ } \ \ /* Subsequent dimensions, if any */ \ for (int i = nonSize1Dims - 2; i >= 0; --i) { \ if (info[i].stride < info[i + 1].size * info[i + 1].stride) { \ /* There are overlaps */ \ return true; \ } \ } \ \ /* Tensor has holes or is contiguous */ \ return false; \ } \ \ bool \ TensorUtils::canUse32BitIndexMath(THCState* state, \ TENSOR_TYPE* t) { \ long elements = TensorUtils::getNumElements(state, t); \ if (elements >= UINT_MAX) { \ return false; \ } \ \ long offset = 0; \ long linearId = elements - 1; \ \ for (int i = TensorUtils::getDims(state, t) - 1; i >= 0; --i) { \ long curDimIndex = \ linearId % TensorUtils::getSize(state, t, i); \ long curDimOffset = curDimIndex * \ TensorUtils::getStride(state, t, i); \ offset += curDimOffset; \ linearId /= TensorUtils::getSize(state, t, i); \ } \ \ if (offset >= UINT_MAX) { \ return false; \ } \ \ return true; \ } IMPL_TENSOR_UTILS(THCudaByteTensor, unsigned char) IMPL_TENSOR_UTILS(THCudaCharTensor, char) IMPL_TENSOR_UTILS(THCudaShortTensor, short) IMPL_TENSOR_UTILS(THCudaIntTensor, int) IMPL_TENSOR_UTILS(THCudaLongTensor, long) IMPL_TENSOR_UTILS(THCudaTensor, float) IMPL_TENSOR_UTILS(THCudaDoubleTensor, double) #ifdef CUDA_HALF_TENSOR IMPL_TENSOR_UTILS(THCudaHalfTensor, half); #endif #undef IMPL_TENSOR_UTILS