Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/cutorch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrevor Killeen <killeentm@gmail.com>2016-10-10 20:09:43 +0300
committerTrevor Killeen <killeentm@gmail.com>2016-10-10 20:09:43 +0300
commite4ebbd3a2d668ae51d49d9deaf2c62fc6de81a8e (patch)
treec0c3cbf24139a1504ff232476dd7b64987605821
parent7860a76e1cc50e5c679a965c95cdca2501cac9bc (diff)
parent9efd392d4974e3fe3b1469809971d5d4f360ec7b (diff)
Merge remote-tracking branch 'upstream/master' into more-generic-functions
-rw-r--r--FFI.lua4
-rw-r--r--TensorMath.lua54
-rw-r--r--generic/CTensor.c14
-rw-r--r--lib/THC/CMakeLists.txt1
-rw-r--r--lib/THC/THCAllocator.c4
-rw-r--r--lib/THC/THCApply.cuh10
-rw-r--r--lib/THC/THCAtomics.cuh131
-rw-r--r--lib/THC/THCDeviceTensor-inl.cuh4
-rw-r--r--lib/THC/THCDeviceTensor.cuh2
-rw-r--r--lib/THC/THCDeviceTensorUtils-inl.cuh2
-rw-r--r--lib/THC/THCGeneral.c25
-rw-r--r--lib/THC/THCGeneral.h.in2
-rw-r--r--lib/THC/THCHalf.cu4
-rw-r--r--lib/THC/THCHalf.h4
-rw-r--r--lib/THC/THCReduce.cuh12
-rw-r--r--lib/THC/THCReduceAll.cuh18
-rw-r--r--lib/THC/THCReduceApplyUtils.cu6
-rw-r--r--lib/THC/THCReduceApplyUtils.cuh2
-rw-r--r--lib/THC/THCTensorConv.cu10
-rw-r--r--lib/THC/THCTensorCopy.cu2
-rw-r--r--lib/THC/THCTensorIndex.cu126
-rw-r--r--lib/THC/THCTensorMath2.cu3
-rw-r--r--lib/THC/THCTensorMathPairwise.cu20
-rw-r--r--lib/THC/THCTensorMathReduce.cuh8
-rw-r--r--lib/THC/THCTensorRandom.cu18
-rw-r--r--lib/THC/THCTensorSort.cu4
-rw-r--r--lib/THC/THCTensorTypeUtils.cu12
-rw-r--r--lib/THC/THCTensorTypeUtils.cuh2
-rw-r--r--lib/THC/generic/THCStorage.c12
-rw-r--r--lib/THC/generic/THCStorage.cu2
-rw-r--r--lib/THC/generic/THCStorage.h20
-rw-r--r--lib/THC/generic/THCTensor.c32
-rw-r--r--lib/THC/generic/THCTensor.h26
-rw-r--r--lib/THC/generic/THCTensorIndex.cu56
-rw-r--r--lib/THC/generic/THCTensorMasked.cu10
-rw-r--r--lib/THC/generic/THCTensorMath.cu2
-rw-r--r--lib/THC/generic/THCTensorMath.h2
-rw-r--r--lib/THC/generic/THCTensorMathPointwise.cu2
-rw-r--r--lib/THC/generic/THCTensorMathReduce.cu4
-rw-r--r--lib/THC/generic/THCTensorScatterGather.cu6
-rw-r--r--lib/THC/generic/THCTensorSort.cu6
-rw-r--r--torch/generic/Storage.c27
-rw-r--r--torch/generic/Tensor.c31
43 files changed, 404 insertions, 338 deletions
diff --git a/FFI.lua b/FFI.lua
index bd3ad1c..9417deb 100644
--- a/FFI.lua
+++ b/FFI.lua
@@ -50,7 +50,7 @@ cudaStream_t THCState_getCurrentStream(THCState *state);
typedef struct THCStorage
{
real *data;
- long size;
+ ptrdiff_t size;
int refcount;
char flag;
THAllocator *allocator;
@@ -65,7 +65,7 @@ typedef struct THCTensor
int nDimension;
THCStorage *storage;
- long storageOffset;
+ ptrdiff_t storageOffset;
int refcount;
char flag;
diff --git a/TensorMath.lua b/TensorMath.lua
index abdda0b..e917f8c 100644
--- a/TensorMath.lua
+++ b/TensorMath.lua
@@ -2,6 +2,58 @@ local wrap = require 'cwrap'
local interface = wrap.CInterface.new()
local method = wrap.CInterface.new()
+local argtypes = wrap.CInterface.argtypes
+
+argtypes['ptrdiff_t'] = {
+
+ helpname = function(arg)
+ return 'ptrdiff_t'
+ end,
+
+ declare = function(arg)
+ -- if it is a number we initialize here
+ local default = tonumber(tostring(arg.default)) or 0
+ return string.format("%s arg%d = %g;", 'ptrdiff_t', arg.i, default)
+ end,
+
+ check = function(arg, idx)
+ return string.format("lua_isinteger(L, %d)", idx)
+ end,
+
+ read = function(arg, idx)
+ return string.format("arg%d = (%s)lua_tointeger(L, %d);", arg.i, 'ptrdiff_t', idx)
+ end,
+
+ init = function(arg)
+ -- otherwise do it here
+ if arg.default then
+ local default = tostring(arg.default)
+ if not tonumber(default) then
+ return string.format("arg%d = %s;", arg.i, default)
+ end
+ end
+ end,
+
+ carg = function(arg)
+ return string.format('arg%d', arg.i)
+ end,
+
+ creturn = function(arg)
+ return string.format('arg%d', arg.i)
+ end,
+
+ precall = function(arg)
+ if arg.returned then
+ return string.format('lua_pushinteger(L, (lua_Integer)arg%d);', arg.i)
+ end
+ end,
+
+ postcall = function(arg)
+ if arg.creturned then
+ return string.format('lua_pushinteger(L, (lua_Integer)arg%d);', arg.i)
+ end
+ end
+}
interface:print('/* WARNING: autogenerated file */')
interface:print('')
@@ -559,7 +611,7 @@ for k, Tensor_ in pairs(handledTypenames) do
wrap("numel",
cname("numel"),
{{name=Tensor},
- {name="long", creturned=true}})
+ {name="ptrdiff_t", creturned=true}})
wrap("add",
cname("add"),
diff --git a/generic/CTensor.c b/generic/CTensor.c
index 5569c02..a9663ff 100644
--- a/generic/CTensor.c
+++ b/generic/CTensor.c
@@ -173,16 +173,16 @@ void THFloatTensor_kernel_copy(float *dst,
long *dst_sz, long *dst_st, int dst_dim,
float *src,
long *src_sz, long *src_st, int src_dim,
- long n_elem)
+ ptrdiff_t n_elem)
{
- long k;
+ ptrdiff_t k;
for(k = 0; k < n_elem; k++)
{
- long src_idx = 0;
- long src_rest = k;
- long dst_idx = 0;
- long dst_rest = k;
+ ptrdiff_t src_idx = 0;
+ ptrdiff_t src_rest = k;
+ ptrdiff_t dst_idx = 0;
+ ptrdiff_t dst_rest = k;
int dim;
for(dim = 0; dim < dst_dim; dim++)
@@ -206,7 +206,7 @@ static int cuda_FloatTensor_fakecopy(lua_State *L)
THFloatTensor *self = luaT_checkudata(L, 1, "torch.FloatTensor");
THFloatTensor *src = luaT_checkudata(L, 2, "torch.FloatTensor");
long *d_self_sz, *d_self_st, *d_src_sz, *d_src_st;
- long nElement = THFloatTensor_nElement(self);
+ ptrdiff_t nElement = THFloatTensor_nElement(self);
THArgCheck(THFloatTensor_nElement(self) == THFloatTensor_nElement(src), 2, "sizes do not match");
diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt
index 181bc9d..b9ddfbe 100644
--- a/lib/THC/CMakeLists.txt
+++ b/lib/THC/CMakeLists.txt
@@ -211,6 +211,7 @@ INSTALL(FILES
THCReduceAll.cuh
THCReduceApplyUtils.cuh
THCAsmUtils.cuh
+ THCAtomics.cuh
THCScanUtils.cuh
THCSortUtils.cuh
THCAllocator.h
diff --git a/lib/THC/THCAllocator.c b/lib/THC/THCAllocator.c
index e2cd09a..1bed0fb 100644
--- a/lib/THC/THCAllocator.c
+++ b/lib/THC/THCAllocator.c
@@ -1,6 +1,6 @@
#include "THCAllocator.h"
-static void *THCudaHostAllocator_alloc(void* ctx, long size) {
+static void *THCudaHostAllocator_alloc(void* ctx, ptrdiff_t size) {
void* ptr;
if (size < 0) THError("Invalid memory size: %ld", size);
@@ -18,7 +18,7 @@ static void THCudaHostAllocator_free(void* ctx, void* ptr) {
THCudaCheck(cudaFreeHost(ptr));
}
-static void *THCudaHostAllocator_realloc(void* ctx, void* ptr, long size) {
+static void *THCudaHostAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
if (size < 0) THError("Invalid memory size: %ld", size);
THCudaHostAllocator_free(ctx, ptr);
diff --git a/lib/THC/THCApply.cuh b/lib/THC/THCApply.cuh
index dd6d32a..a47e303 100644
--- a/lib/THC/THCApply.cuh
+++ b/lib/THC/THCApply.cuh
@@ -101,7 +101,7 @@ inline dim3 getApplyBlock() {
return dim3(THC_APPLY_THREADS_PER_BLOCK);
}
-inline bool getApplyGrid(THCState* state, long totalElements, dim3& grid) {
+inline bool getApplyGrid(THCState* state, ptrdiff_t totalElements, dim3& grid) {
int curDevice = -1;
cudaGetDevice(&curDevice);
@@ -116,7 +116,7 @@ inline bool getApplyGrid(THCState* state, long totalElements, dim3& grid) {
// 16 warps per block * 4 per SM gives 64 warps per SM at maximum,
// which seems to be a good sweetspot for latency hiding
grid = dim3(min((long long) THCCeilDiv(totalElements,
- (long) THC_APPLY_THREADS_PER_BLOCK),
+ (ptrdiff_t) THC_APPLY_THREADS_PER_BLOCK),
4LL * numSM));
return true;
}
@@ -139,7 +139,7 @@ bool THC_pointwiseApply1(THCState* state,
const dim3 block = getApplyBlock();
dim3 grid;
- long totalElements = TensorUtils<TensorTypeA>::getNumElements(state, a);
+ ptrdiff_t totalElements = TensorUtils<TensorTypeA>::getNumElements(state, a);
if (!getApplyGrid(state, totalElements, grid)) {
return false;
@@ -253,7 +253,7 @@ bool THC_pointwiseApply2(THCState* state,
const Op& op,
TensorArgType aType = ReadWrite,
TensorArgType bType = ReadOnly) {
- long totalElements = TensorUtils<TensorTypeA>::getNumElements(state, a);
+ ptrdiff_t totalElements = TensorUtils<TensorTypeA>::getNumElements(state, a);
if (totalElements != TensorUtils<TensorTypeB>::getNumElements(state, b)) {
return false;
@@ -431,7 +431,7 @@ bool THC_pointwiseApply3(THCState* state,
TensorArgType aType = ReadWrite,
TensorArgType bType = ReadOnly,
TensorArgType cType = ReadOnly) {
- long totalElements = TensorUtils<TensorTypeA>::getNumElements(state, a);
+ ptrdiff_t totalElements = TensorUtils<TensorTypeA>::getNumElements(state, a);
if (totalElements != TensorUtils<TensorTypeB>::getNumElements(state, b) ||
totalElements != TensorUtils<TensorTypeC>::getNumElements(state, c)) {
diff --git a/lib/THC/THCAtomics.cuh b/lib/THC/THCAtomics.cuh
new file mode 100644
index 0000000..0586edf
--- /dev/null
+++ b/lib/THC/THCAtomics.cuh
@@ -0,0 +1,131 @@
+#ifndef THC_ATOMICS_INC
+#define THC_ATOMICS_INC
+
+#include "THCHalf.h"
+
+template <typename T, size_t n>
+struct AtomicAddIntegerImpl;
+
+template<typename T>
+struct AtomicAddIntegerImpl<T, 1> {
+ __device__ void operator()(T *address, T val) {
+ unsigned int * address_as_ui =
+ (unsigned int *) (address - ((size_t)address & 3));
+ unsigned int old = *address_as_ui;
+ unsigned int shift = (((size_t)address & 3) * 8);
+ unsigned int sum;
+ unsigned int assumed;
+
+ do {
+ assumed = old;
+ sum = val + T((old >> shift) & 0xff);
+ old = (old & ~(0x000000ff << shift)) | (sum << shift);
+ old = atomicCAS(address_as_ui, assumed, old);
+ } while (assumed != old);
+ }
+};
+
+template<typename T>
+struct AtomicAddIntegerImpl<T, 2> {
+ __device__ void operator()(T *address, T val) {
+ unsigned int * address_as_ui =
+ (unsigned int *) ((char *)address - ((size_t)address & 2));
+ unsigned int old = *address_as_ui;
+ unsigned int sum;
+ unsigned int newval;
+ unsigned int assumed;
+
+ do {
+ assumed = old;
+ sum = val + (size_t)address & 2 ? T(old >> 16) : T(old & 0xffff);
+ newval = (size_t)address & 2 ? (old & 0xffff) | (sum << 16) : (old & 0xffff0000) | sum;
+ old = atomicCAS(address_as_ui, assumed, newval);
+ } while (assumed != old);
+ }
+};
+
+template<typename T>
+struct AtomicAddIntegerImpl<T, 4> {
+ __device__ void operator()(T *address, T val) {
+ unsigned int * address_as_ui = (unsigned int *) (address);
+ unsigned int old = *address_as_ui;
+ unsigned int newval;
+ unsigned int assumed;
+
+ do {
+ assumed = old;
+ newval = val + (T)old;
+ old = atomicCAS(address_as_ui, assumed, newval);
+ } while (assumed != old);
+ }
+};
+
+template<typename T>
+struct AtomicAddIntegerImpl<T, 8> {
+ __device__ void operator()(T *address, T val) {
+ unsigned long long * address_as_ui = (unsigned long long *) (address);
+ unsigned long long old = *address_as_ui;
+ unsigned long long newval;
+ unsigned long long assumed;
+
+ do {
+ assumed = old;
+ newval = val + (T)old;
+ old = atomicCAS(address_as_ui, assumed, newval);
+ } while (assumed != old);
+ }
+};
+
+__device__ void atomicAdd(unsigned char *address, unsigned char val) {
+ AtomicAddIntegerImpl<unsigned char, sizeof(unsigned char)>()(address, val);
+}
+
+__device__ void atomicAdd(char *address, char val) {
+ AtomicAddIntegerImpl<char, sizeof(char)>()(address, val);
+}
+
+__device__ void atomicAdd(short *address, short val) {
+ AtomicAddIntegerImpl<short, sizeof(short)>()(address, val);
+}
+
+__device__ void atomicAdd(long *address, long val) {
+ AtomicAddIntegerImpl<long, sizeof(long)>()(address, val);
+}
+
+#ifdef CUDA_HALF_TENSOR
+__device__ void atomicAdd(half *address, half val) {
+ unsigned int * address_as_ui =
+ (unsigned int *) ((char *)address - ((size_t)address & 2));
+ unsigned int old = *address_as_ui;
+ unsigned int assumed;
+
+ do {
+ assumed = old;
+ half hsum;
+ hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff);
+ hsum = THCNumerics<half>::add(hsum, val);
+ old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x;
+ old = atomicCAS(address_as_ui, assumed, old);
+ } while (assumed != old);
+}
+#endif
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+// from CUDA C Programmic Guide
+__device__ void atomicAdd(double *address, double val) {
+ unsigned long long int* address_as_ull = (unsigned long long int*)address;
+ unsigned long long int old = *address_as_ull;
+ unsigned long long int assumed;
+
+ do {
+ assumed = old;
+ old = atomicCAS(address_as_ull, assumed,
+ __double_as_longlong(val +
+ __longlong_as_double(assumed)));
+
+ // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
+ } while (assumed != old);
+}
+#endif
+
+#endif // THC_ATOMICS_INC
diff --git a/lib/THC/THCDeviceTensor-inl.cuh b/lib/THC/THCDeviceTensor-inl.cuh
index a0058ad..9237d07 100644
--- a/lib/THC/THCDeviceTensor-inl.cuh
+++ b/lib/THC/THCDeviceTensor-inl.cuh
@@ -103,9 +103,9 @@ THCDeviceTensor<T, Dim, IndexT, PtrTraits>::cast() const {
template <typename T, int Dim,
typename IndexT, template <typename U> class PtrTraits>
-__host__ __device__ long
+__host__ __device__ ptrdiff_t
THCDeviceTensor<T, Dim, IndexT, PtrTraits>::numElements() const {
- long size = getSize(0);
+ ptrdiff_t size = getSize(0);
for (int i = 1; i < Dim; ++i) {
size *= getSize(i);
diff --git a/lib/THC/THCDeviceTensor.cuh b/lib/THC/THCDeviceTensor.cuh
index 14d56c6..c6b7899 100644
--- a/lib/THC/THCDeviceTensor.cuh
+++ b/lib/THC/THCDeviceTensor.cuh
@@ -142,7 +142,7 @@ class THCDeviceTensor {
/// Returns the total number of elements contained within our data
/// (product of `getSize(i)`)
- __host__ __device__ long numElements() const;
+ __host__ __device__ ptrdiff_t numElements() const;
/// Returns the size array.
__host__ __device__ __forceinline__ const IndexT* sizes() const {
diff --git a/lib/THC/THCDeviceTensorUtils-inl.cuh b/lib/THC/THCDeviceTensorUtils-inl.cuh
index f954772..26c1bb8 100644
--- a/lib/THC/THCDeviceTensorUtils-inl.cuh
+++ b/lib/THC/THCDeviceTensorUtils-inl.cuh
@@ -10,7 +10,7 @@ toDeviceTensor(THCState* state, THCudaTensor* t) {
// Determine the maximum offset into the tensor achievable; `IndexT`
// must be smaller than this type in order to use it.
- long maxOffset = 0;
+ ptrdiff_t maxOffset = 0;
IndexT sizes[Dim];
IndexT strides[Dim];
diff --git a/lib/THC/THCGeneral.c b/lib/THC/THCGeneral.c
index 07dbf2c..5bcce19 100644
--- a/lib/THC/THCGeneral.c
+++ b/lib/THC/THCGeneral.c
@@ -60,8 +60,8 @@ struct THCState {
void (*cutorchGCFunction)(void *data);
void *cutorchGCData;
- long heapSoftmax;
- long heapDelta;
+ ptrdiff_t heapSoftmax;
+ ptrdiff_t heapDelta;
};
THCCudaResourcesPerDevice* THCState_getDeviceResourcePtr(
@@ -640,8 +640,9 @@ void __THCublasCheck(cublasStatus_t status, const char *file, const int line)
}
}
-static long heapSize = 0; // not thread-local
-static const long heapMaxDelta = 1e6;
+static ptrdiff_t heapSize = 0; // not thread-local
+static const ptrdiff_t heapMaxDelta = (ptrdiff_t)1e6;
+static const ptrdiff_t heapMinDelta = (ptrdiff_t)-1e6;
static const double heapSoftmaxGrowthThresh = 0.8; // grow softmax if >80% max after GC
static const double heapSoftmaxGrowthFactor = 1.4; // grow softmax by 40%
@@ -691,8 +692,8 @@ cudaError_t THCudaFree(THCState *state, void *ptr)
return allocator->free(allocator->state, ptr);
}
-static long applyHeapDelta(THCState *state) {
- long newHeapSize = THAtomicAddLong(&heapSize, state->heapDelta) + state->heapDelta;
+static ptrdiff_t applyHeapDelta(THCState *state) {
+ ptrdiff_t newHeapSize = THAtomicAddPtrdiff(&heapSize, state->heapDelta) + state->heapDelta;
state->heapDelta = 0;
return newHeapSize;
}
@@ -701,27 +702,27 @@ static long applyHeapDelta(THCState *state) {
// When THC heap size goes above this softmax, the GC hook is triggered.
// If heap size is above 80% of the softmax after GC, then the softmax is
// increased.
-static void maybeTriggerGC(THCState *state, long curHeapSize) {
+static void maybeTriggerGC(THCState *state, ptrdiff_t curHeapSize) {
if (state->cutorchGCFunction != NULL && curHeapSize > state->heapSoftmax) {
(state->cutorchGCFunction)(state->cutorchGCData);
// ensure heapSize is accurate before updating heapSoftmax
- long newHeapSize = applyHeapDelta(state);
+ ptrdiff_t newHeapSize = applyHeapDelta(state);
if (newHeapSize > state->heapSoftmax * heapSoftmaxGrowthThresh) {
- state->heapSoftmax = state->heapSoftmax * heapSoftmaxGrowthFactor;
+ state->heapSoftmax = (ptrdiff_t)state->heapSoftmax * heapSoftmaxGrowthFactor;
}
}
}
-void THCHeapUpdate(THCState *state, long size) {
+void THCHeapUpdate(THCState *state, ptrdiff_t size) {
state->heapDelta += size;
// batch updates to global heapSize to minimize thread contention
- if (labs(state->heapDelta) < heapMaxDelta) {
+ if (state->heapDelta < heapMaxDelta && state->heapDelta > heapMinDelta) {
return;
}
- long newHeapSize = applyHeapDelta(state);
+ ptrdiff_t newHeapSize = applyHeapDelta(state);
if (size > 0) {
maybeTriggerGC(state, newHeapSize);
}
diff --git a/lib/THC/THCGeneral.h.in b/lib/THC/THCGeneral.h.in
index 8837f42..ce3ffc5 100644
--- a/lib/THC/THCGeneral.h.in
+++ b/lib/THC/THCGeneral.h.in
@@ -114,6 +114,6 @@ THC_API cudaError_t THCudaFree(THCState *state, void *ptr);
THC_API void THCSetGCHandler(THCState *state,
void (*torchGCHandlerFunction)(void *data),
void *data );
-THC_API void THCHeapUpdate(THCState *state, long size);
+THC_API void THCHeapUpdate(THCState *state, ptrdiff_t size);
#endif
diff --git a/lib/THC/THCHalf.cu b/lib/THC/THCHalf.cu
index 7777bf7..5a77293 100644
--- a/lib/THC/THCHalf.cu
+++ b/lib/THC/THCHalf.cu
@@ -10,7 +10,7 @@ struct __float2halfOp {
__device__ half operator()(float v) { return __float2half(v); }
};
-void THCFloat2Half(THCState *state, half *out, float *in, long len) {
+void THCFloat2Half(THCState *state, half *out, float *in, ptrdiff_t len) {
thrust::transform(
#if CUDA_VERSION >= 7000
thrust::cuda::par.on(THCState_getCurrentStream(state)),
@@ -20,7 +20,7 @@ void THCFloat2Half(THCState *state, half *out, float *in, long len) {
in, in + len, out, __float2halfOp());
}
-void THCHalf2Float(THCState *state, float *out, half *in, long len) {
+void THCHalf2Float(THCState *state, float *out, half *in, ptrdiff_t len) {
thrust::transform(
#if CUDA_VERSION >= 7000
thrust::cuda::par.on(THCState_getCurrentStream(state)),
diff --git a/lib/THC/THCHalf.h b/lib/THC/THCHalf.h
index ec6d83f..795874e 100644
--- a/lib/THC/THCHalf.h
+++ b/lib/THC/THCHalf.h
@@ -18,8 +18,8 @@
#include <cuda_fp16.h>
#include <stdint.h>
-THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, long len);
-THC_EXTERNC void THCHalf2Float(THCState *state, float *out, half *in, long len);
+THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, ptrdiff_t len);
+THC_EXTERNC void THCHalf2Float(THCState *state, float *out, half *in, ptrdiff_t len);
THC_API half THC_float2half(float a);
THC_API float THC_half2float(half a);
diff --git a/lib/THC/THCReduce.cuh b/lib/THC/THCReduce.cuh
index a7135de..7f276a2 100644
--- a/lib/THC/THCReduce.cuh
+++ b/lib/THC/THCReduce.cuh
@@ -123,7 +123,7 @@ inline dim3 getNoncontigReduceBlock() {
return dim3(THC_NONCONTIG_REDUCE_BLOCK_SIZE);
}
-inline dim3 getContigReduceBlock(long numSlices, long reductionSize) {
+inline dim3 getContigReduceBlock(ptrdiff_t numSlices, long reductionSize) {
// If the number of slices is low but the reduction dimension size
// is high, then we should increase block size for greater parallelism.
// Aim for at least 32 warps per SM (assume 15 SMs; don't bother
@@ -148,13 +148,13 @@ inline dim3 getContigReduceBlock(long numSlices, long reductionSize) {
return dim3(numWarps * 32);
}
-inline bool getNoncontigReduceGrid(long elements, dim3& grid) {
+inline bool getNoncontigReduceGrid(ptrdiff_t elements, dim3& grid) {
// One output point per thread
return THC_getGridFromTiles(THCCeilDiv(elements,
- (long) THC_NONCONTIG_REDUCE_BLOCK_SIZE), grid);
+ (ptrdiff_t) THC_NONCONTIG_REDUCE_BLOCK_SIZE), grid);
}
-inline bool getContigReduceGrid(long elements, dim3& grid) {
+inline bool getContigReduceGrid(ptrdiff_t elements, dim3& grid) {
// One output point per block
return THC_getGridFromTiles(elements, grid);
}
@@ -169,11 +169,11 @@ bool THC_reduceDim(THCState* state,
const ReduceOp& reduceOp,
typename TensorUtils<TensorType>::DataType init,
int dim) {
- long inElements = TensorUtils<TensorType>::getNumElements(state, in);
+ ptrdiff_t inElements = TensorUtils<TensorType>::getNumElements(state, in);
long reductionSize = TensorUtils<TensorType>::getSize(state, in, dim);
long reductionStride = TensorUtils<TensorType>::getStride(state, in, dim);
- long outElements = inElements / reductionSize;
+ ptrdiff_t outElements = inElements / reductionSize;
if (TensorUtils<TensorType>::getDims(state, out) > MAX_CUTORCH_DIMS ||
TensorUtils<TensorType>::getDims(state, in) > MAX_CUTORCH_DIMS) {
diff --git a/lib/THC/THCReduceAll.cuh b/lib/THC/THCReduceAll.cuh
index 498fb53..a9cea84 100644
--- a/lib/THC/THCReduceAll.cuh
+++ b/lib/THC/THCReduceAll.cuh
@@ -124,16 +124,16 @@ kernelReduceAllPass2(int numPass1Blocks,
// Perform a two-pass reduction if the tensor is large enough to
// warrant it.
-inline bool isTwoPassReductionSize(long elements) {
+inline bool isTwoPassReductionSize(ptrdiff_t elements) {
return (elements > THC_TWO_PASS_REDUCTION_SIZE);
}
template <typename InT, typename AccT>
-inline long getTwoPassBlocks(THCState* state, long elements) {
- long numBlocks = THCCeilDiv(elements, THC_REDUCE_ALL_BLOCK_SIZE);
+inline ptrdiff_t getTwoPassBlocks(THCState* state, ptrdiff_t elements) {
+ ptrdiff_t numBlocks = THCCeilDiv(elements, (ptrdiff_t)THC_REDUCE_ALL_BLOCK_SIZE);
// We can only have as many blocks as there is scratch space
- long scratchSpace =
+ ptrdiff_t scratchSpace =
THCState_getCurrentDeviceScratchSpaceSize(state) / sizeof(AccT);
THAssert(scratchSpace > 0);
@@ -146,14 +146,14 @@ inline long getTwoPassBlocks(THCState* state, long elements) {
// Get the block/grid size that we want
template <typename InT, typename AccT>
-inline void getPass1ReduceBlockGrid(THCState* state, long elements,
+inline void getPass1ReduceBlockGrid(THCState* state, ptrdiff_t elements,
dim3& grid, dim3& block) {
grid = dim3(getTwoPassBlocks<InT, AccT>(state, elements));
block = dim3(THC_REDUCE_ALL_BLOCK_SIZE);
}
template <typename InT, typename AccT>
-inline void getPass2ReduceBlockGrid(THCState* state, long elements,
+inline void getPass2ReduceBlockGrid(THCState* state, ptrdiff_t elements,
dim3& grid, dim3& block) {
grid = dim3(1);
// We only need as many threads as there were blocks originally
@@ -161,7 +161,7 @@ inline void getPass2ReduceBlockGrid(THCState* state, long elements,
}
template <typename InT, typename AccT>
-inline void getSinglePassReduceBlockGrid(long elements,
+inline void getSinglePassReduceBlockGrid(ptrdiff_t elements,
dim3& grid, dim3& block) {
grid = dim3(1);
block = dim3(THC_REDUCE_ALL_BLOCK_SIZE);
@@ -176,7 +176,7 @@ template <typename ModifyOp,
int ADims>
void callReduceAll(THCState* state,
const TensorInfo<InT, IndexType>& in,
- long totalElements,
+ ptrdiff_t totalElements,
AccT init,
const ModifyOp& modifyOp,
const ReduceOp& reduceOp,
@@ -229,7 +229,7 @@ bool THC_reduceAll(THCState* state,
AccT init,
AccT* out,
int outOnDevice) {
- long inElements = TensorUtils<TensorType>::getNumElements(state, in);
+ ptrdiff_t inElements = TensorUtils<TensorType>::getNumElements(state, in);
if (TensorUtils<TensorType>::getDims(state, in) > MAX_CUTORCH_DIMS) {
return false;
diff --git a/lib/THC/THCReduceApplyUtils.cu b/lib/THC/THCReduceApplyUtils.cu
index b3c6f51..6d4c06e 100644
--- a/lib/THC/THCReduceApplyUtils.cu
+++ b/lib/THC/THCReduceApplyUtils.cu
@@ -11,7 +11,7 @@ void THCCheckTensorDims(THCState* state, THCudaTensor* tensor, int arg) {
THArgCheck(dims <= MAX_CUTORCH_DIMS, arg, CUTORCH_DIM_WARNING);
}
-bool THC_getGridFromTiles(long gridTiles, dim3& grid) {
+bool THC_getGridFromTiles(ptrdiff_t gridTiles, dim3& grid) {
if (gridTiles > MAX_GRID_SIZE * MAX_GRID_SIZE * MAX_GRID_SIZE) {
return false;
}
@@ -21,11 +21,11 @@ bool THC_getGridFromTiles(long gridTiles, dim3& grid) {
long gridZ = 1;
if (gridTiles > MAX_GRID_SIZE) {
- gridTiles = THCCeilDiv(gridTiles, (long) MAX_GRID_SIZE);
+ gridTiles = THCCeilDiv(gridTiles, (ptrdiff_t) MAX_GRID_SIZE);
gridY = gridTiles > MAX_GRID_SIZE ? MAX_GRID_SIZE : gridTiles;
if (gridTiles > MAX_GRID_SIZE) {
- gridTiles = THCCeilDiv(gridTiles, (long) MAX_GRID_SIZE);
+ gridTiles = THCCeilDiv(gridTiles, (ptrdiff_t) MAX_GRID_SIZE);
gridZ = gridTiles > MAX_GRID_SIZE ? MAX_GRID_SIZE : gridTiles;
}
}
diff --git a/lib/THC/THCReduceApplyUtils.cuh b/lib/THC/THCReduceApplyUtils.cuh
index 06f969f..e365b3a 100644
--- a/lib/THC/THCReduceApplyUtils.cuh
+++ b/lib/THC/THCReduceApplyUtils.cuh
@@ -76,6 +76,6 @@ __device__ T reduceBlock(T* smem,
void THCCheckTensorDims(THCState* state, THCudaTensor* tensor, int arg);
// Produces a grid with at least one point per tile
-THC_API bool THC_getGridFromTiles(long gridTiles, dim3& grid);
+THC_API bool THC_getGridFromTiles(ptrdiff_t gridTiles, dim3& grid);
#endif // THC_REDUCE_APPLY_UTILS_INC
diff --git a/lib/THC/THCTensorConv.cu b/lib/THC/THCTensorConv.cu
index 2543d26..71aac03 100644
--- a/lib/THC/THCTensorConv.cu
+++ b/lib/THC/THCTensorConv.cu
@@ -354,7 +354,7 @@ THC_API void THCudaTensor_conv2Dmv(THCState *state, THCudaTensor *output, float
nOutputCols = (nInputCols - nKernelCols) / scol + 1;
}
- long nelem = THCudaTensor_nElement(state, output);
+ ptrdiff_t nelem = THCudaTensor_nElement(state, output);
THCudaTensor_resize3d(state, output, nOutputPlane, nOutputRows, nOutputCols);
if (beta == 0 || nelem != THCudaTensor_nElement(state, output)) {
@@ -475,7 +475,7 @@ THC_API void THCudaTensor_conv2Dmm(THCState *state, THCudaTensor *output, float
nOutputCols = (nInputCols - nKernelCols) / scol + 1;
}
- long nelem = THCudaTensor_nElement(state, output);
+ ptrdiff_t nelem = THCudaTensor_nElement(state, output);
THCudaTensor_resize4d(state, output, nbatch, nOutputPlane, nOutputRows, nOutputCols);
if (beta == 0 || nelem != THCudaTensor_nElement(state, output)) {
@@ -576,7 +576,7 @@ THC_API void THCudaTensor_conv2DRevger(THCState *state, THCudaTensor *output, fl
nOutputRows = nInputRows - (nKernelRows - 1) * srow;
nOutputCols = nInputCols - (nKernelCols - 1) * scol;
- long nelem = THCudaTensor_nElement(state, output);
+ ptrdiff_t nelem = THCudaTensor_nElement(state, output);
THCudaTensor_resize4d(state, output, nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
if (nelem == 0 || beta == 0 || nelem != THCudaTensor_nElement(state, output)) {
@@ -649,7 +649,7 @@ THC_API void THCudaTensor_conv2DRevgerm(THCState *state, THCudaTensor *output, f
nOutputRows = nInputRows - (nKernelRows - 1) * srow;
nOutputCols = nInputCols - (nKernelCols - 1) * scol;
- long nelem = THCudaTensor_nElement(state, output);
+ ptrdiff_t nelem = THCudaTensor_nElement(state, output);
THCudaTensor_resize4d(state, output, nKernelPlane, nInputPlane, nOutputRows, nOutputCols);
if (nelem == 0 || beta == 0 || nelem != THCudaTensor_nElement(state, output)) {
@@ -912,7 +912,7 @@ THC_API void THCudaTensor_conv2Dmap(THCState *state, THCudaTensor *output, THCud
nOutputRows = (nInputRows - nKernelRows) / stride_y + 1;
nOutputCols = (nInputCols - nKernelCols) / stride_x + 1;
- // long nelem = THCudaTensor_nElement(state, output);
+ // ptrdiff_t nelem = THCudaTensor_nElement(state, output);
THCudaTensor_resize3d(state, output, nOutputPlane, nOutputRows, nOutputCols);
float *input_data = THCudaTensor_data(state, input);
diff --git a/lib/THC/THCTensorCopy.cu b/lib/THC/THCTensorCopy.cu
index d5aae4f..b0001c5 100644
--- a/lib/THC/THCTensorCopy.cu
+++ b/lib/THC/THCTensorCopy.cu
@@ -24,7 +24,7 @@ struct CopyOp {
template <typename TensorTypeDst, typename TensorTypeSrc>
void
THC_copyTensor(THCState* state, TensorTypeDst* dst, TensorTypeSrc* src) {
- long totalElements = TensorUtils<TensorTypeDst>::getNumElements(state, dst);
+ ptrdiff_t totalElements = TensorUtils<TensorTypeDst>::getNumElements(state, dst);
THArgCheck(totalElements ==
TensorUtils<TensorTypeSrc>::getNumElements(state, src),
diff --git a/lib/THC/THCTensorIndex.cu b/lib/THC/THCTensorIndex.cu
index 8bf1a88..415e625 100644
--- a/lib/THC/THCTensorIndex.cu
+++ b/lib/THC/THCTensorIndex.cu
@@ -9,6 +9,7 @@
#include "THCReduce.cuh"
#include "THCDeviceUtils.cuh"
#include "THCNumerics.cuh"
+#include "THCAtomics.cuh"
#include <algorithm> // for std::min
// We prefer this kernel to avoid reloading index points if the number
@@ -96,131 +97,6 @@ __global__ void indexCopyLargeIndex(TensorInfo<T, IndexType> dst,
}
}
-template <typename T, size_t n>
-struct AtomicAddIntegerImpl;
-
-template<typename T>
-struct AtomicAddIntegerImpl<T, 1> {
- __device__ void operator()(T *address, T val) {
- unsigned int * address_as_ui =
- (unsigned int *) (address - ((size_t)address & 3));
- unsigned int old = *address_as_ui;
- unsigned int shift = (((size_t)address & 3) * 8);
- unsigned int sum;
- unsigned int assumed;
-
- do {
- assumed = old;
- sum = val + T((old >> shift) & 0xff);
- old = (old & ~(0x000000ff << shift)) | (sum << shift);
- old = atomicCAS(address_as_ui, assumed, old);
- } while (assumed != old);
- }
-};
-
-template<typename T>
-struct AtomicAddIntegerImpl<T, 2> {
- __device__ void operator()(T *address, T val) {
- unsigned int * address_as_ui =
- (unsigned int *) ((char *)address - ((size_t)address & 2));
- unsigned int old = *address_as_ui;
- unsigned int sum;
- unsigned int newval;
- unsigned int assumed;
-
- do {
- assumed = old;
- sum = val + (size_t)address & 2 ? T(old >> 16) : T(old & 0xffff);
- newval = (size_t)address & 2 ? (old & 0xffff) | (sum << 16) : (old & 0xffff0000) | sum;
- old = atomicCAS(address_as_ui, assumed, newval);
- } while (assumed != old);
- }
-};
-
-template<typename T>
-struct AtomicAddIntegerImpl<T, 4> {
- __device__ void operator()(T *address, T val) {
- unsigned int * address_as_ui = (unsigned int *) (address);
- unsigned int old = *address_as_ui;
- unsigned int newval;
- unsigned int assumed;
-
- do {
- assumed = old;
- newval = val + (T)old;
- old = atomicCAS(address_as_ui, assumed, newval);
- } while (assumed != old);
- }
-};
-
-template<typename T>
-struct AtomicAddIntegerImpl<T, 8> {
- __device__ void operator()(T *address, T val) {
- unsigned long long * address_as_ui = (unsigned long long *) (address);
- unsigned long long old = *address_as_ui;
- unsigned long long newval;
- unsigned long long assumed;
-
- do {
- assumed = old;
- newval = val + (T)old;
- old = atomicCAS(address_as_ui, assumed, newval);
- } while (assumed != old);
- }
-};
-
-__device__ void atomicAdd(unsigned char *address, unsigned char val) {
- AtomicAddIntegerImpl<unsigned char, sizeof(unsigned char)>()(address, val);
-}
-
-__device__ void atomicAdd(char *address, char val) {
- AtomicAddIntegerImpl<char, sizeof(char)>()(address, val);
-}
-
-__device__ void atomicAdd(short *address, short val) {
- AtomicAddIntegerImpl<short, sizeof(short)>()(address, val);
-}
-
-__device__ void atomicAdd(long *address, long val) {
- AtomicAddIntegerImpl<long, sizeof(long)>()(address, val);
-}
-
-#ifdef CUDA_HALF_TENSOR
-__device__ void atomicAdd(half *address, half val) {
- unsigned int * address_as_ui =
- (unsigned int *) ((char *)address - ((size_t)address & 2));
- unsigned int old = *address_as_ui;
- unsigned int assumed;
-
- do {
- assumed = old;
- half hsum;
- hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff);
- hsum = THCNumerics<half>::add(hsum, val);
- old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x;
- old = atomicCAS(address_as_ui, assumed, old);
- } while (assumed != old);
-}
-#endif
-
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
-// from CUDA C Programmic Guide
-__device__ void atomicAdd(double *address, double val) {
- unsigned long long int* address_as_ull = (unsigned long long int*)address;
- unsigned long long int old = *address_as_ull;
- unsigned long long int assumed;
-
- do {
- assumed = old;
- old = atomicCAS(address_as_ull, assumed,
- __double_as_longlong(val +
- __longlong_as_double(assumed)));
-
- // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
- } while (assumed != old);
-}
-#endif
-
// We prefer this kernel to avoid reloading index points if the number
// of indices is a small number.
// This kernel in fact works for all choices of problem size, but if
diff --git a/lib/THC/THCTensorMath2.cu b/lib/THC/THCTensorMath2.cu
index afd262d..d1fe328 100644
--- a/lib/THC/THCTensorMath2.cu
+++ b/lib/THC/THCTensorMath2.cu
@@ -72,7 +72,7 @@ float THCudaTensor_dist(THCState *state, THCudaTensor *self, THCudaTensor *src,
{
THAssert(THCudaTensor_checkGPU(state, 2, self, src));
self = THCudaTensor_newContiguous(state, self);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
src = THCudaTensor_newContiguous(state, src);
thrust::device_ptr<float> self_data(THCudaTensor_data(state, self));
thrust::device_ptr<float> src_data(THCudaTensor_data(state, src));
@@ -103,4 +103,3 @@ void THCudaTensor_randn(THCState *state, THCudaTensor *r_, THLongStorage *size)
THCudaTensor_resize(state, r_, size, NULL);
THCudaTensor_normal(state, r_, 0, 1);
}
-
diff --git a/lib/THC/THCTensorMathPairwise.cu b/lib/THC/THCTensorMathPairwise.cu
index e7d0573..2695f2d 100644
--- a/lib/THC/THCTensorMathPairwise.cu
+++ b/lib/THC/THCTensorMathPairwise.cu
@@ -344,21 +344,21 @@ void THCudaTensor_triu(THCState *state, THCudaTensor *self_, THCudaTensor *src_,
#include "THCGenerateAllTypes.h"
// Copy the kth diagonal of a matrix B to a vector A.
-__global__ void THCudaTensor_copyFromDiagonal(float* a, float* b, long start, long size, long strideSum, long strideA) {
- for (long linearIndex = blockIdx.x * blockDim.x + threadIdx.x;
+__global__ void THCudaTensor_copyFromDiagonal(float* a, float* b, ptrdiff_t start, ptrdiff_t size, ptrdiff_t strideSum, ptrdiff_t strideA) {
+ for (ptrdiff_t linearIndex = blockIdx.x * blockDim.x + threadIdx.x;
linearIndex < size;
linearIndex += gridDim.x * blockDim.x) {
- const long bOffset = start + strideSum * linearIndex;
+ const ptrdiff_t bOffset = start + strideSum * linearIndex;
a[strideA * linearIndex] = b[bOffset];
}
}
// Copy vector B to the kth diagonal of a matrix A
-__global__ void THCudaTensor_copyToDiagonal(float* a, float* b, long start, long size, long strideSum, long strideB) {
- for (long linearIndex = blockIdx.x * blockDim.x + threadIdx.x;
+__global__ void THCudaTensor_copyToDiagonal(float* a, float* b, ptrdiff_t start, ptrdiff_t size, ptrdiff_t strideSum, ptrdiff_t strideB) {
+ for (ptrdiff_t linearIndex = blockIdx.x * blockDim.x + threadIdx.x;
linearIndex < size;
linearIndex += gridDim.x * blockDim.x) {
- const long aOffset = start + strideSum * linearIndex;
+ const ptrdiff_t aOffset = start + strideSum * linearIndex;
a[aOffset] = b[strideB * linearIndex];
}
}
@@ -381,16 +381,16 @@ void THCudaTensor_diag(THCState *state, THCudaTensor *self_, THCudaTensor *src_,
THCudaTensor_copyFromDiagonal<<<grid, threads, 0, THCState_getCurrentStream(state)>>>
(THCudaTensor_data(state, self_), THCudaTensor_data(state, src_), start, size, stride0 + stride1, strideSelf);
} else {
- long totalElements = THCudaTensor_nElement(state, src_);
- long size = (k > 0) ? totalElements + k : totalElements - k;
+ ptrdiff_t totalElements = THCudaTensor_nElement(state, src_);
+ ptrdiff_t size = (k > 0) ? totalElements + k : totalElements - k;
long strideSrc = THCudaTensor_stride(state, src_, 0);
THCudaTensor_resize2d(state, self_, size, size);
THCudaTensor_zero(state, self_);
long stride0 = THCudaTensor_stride(state, self_, 0);
long stride1 = THCudaTensor_stride(state, self_, 1);
const dim3 threads(min((long long)THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock, (long long)size));
- dim3 grid(min((long long)1024, (long long)THCCeilDiv(size, (long)threads.x)));
- long start = (k >= 0 ? k * stride1 : -k * stride0);
+ dim3 grid(min((long long)1024, (long long)THCCeilDiv(size, (ptrdiff_t)threads.x)));
+ ptrdiff_t start = (k >= 0 ? k * stride1 : -k * stride0);
THCudaTensor_copyToDiagonal<<<grid, threads, 0, THCState_getCurrentStream(state)>>>
(THCudaTensor_data(state, self_), THCudaTensor_data(state, src_), start, totalElements, stride0 + stride1, strideSrc);
}
diff --git a/lib/THC/THCTensorMathReduce.cuh b/lib/THC/THCTensorMathReduce.cuh
index 8e368be..db2e424 100644
--- a/lib/THC/THCTensorMathReduce.cuh
+++ b/lib/THC/THCTensorMathReduce.cuh
@@ -123,7 +123,7 @@ struct LogicalAny {
};
template<typename Real>
-__global__ void THCTensor_kernel_renorm(Real *data, const Real value, const long size, const Real maxnorm)
+__global__ void THCTensor_kernel_renorm(Real *data, const Real value, const ptrdiff_t size, const Real maxnorm)
{
__shared__ Real buffer[32];
long tx = threadIdx.x;
@@ -134,7 +134,7 @@ __global__ void THCTensor_kernel_renorm(Real *data, const Real value, const long
buffer[tx] = ScalarConvert<int, Real>::to(0);
// get norm of axis
- for (long i=tx; i<size; i+=step)
+ for (ptrdiff_t i=tx; i<size; i+=step)
{
buffer[tx] = THCNumerics<Real>::add(
buffer[tx],
@@ -163,7 +163,7 @@ __global__ void THCTensor_kernel_renorm(Real *data, const Real value, const long
)
);
// renormalize
- for (long i=tx; i<size; i+=step)
+ for (ptrdiff_t i=tx; i<size; i+=step)
{
row[i] = THCNumerics<Real>::mul(row[i], norm);
}
@@ -326,7 +326,7 @@ __host__ void THCTensor_varOuterDim(THCState *state, TensorTypeK *tgt, TensorTyp
unsigned ndim = TensorUtils<TensorTypeK>::getDims(state, src);
// Treat all outer dimensions (i.e. dim < dimension) as one.
unsigned num_orows = 1;
- for (unsigned dim = 0; dim < dimension; dim++) {
+ for (long dim = 0; dim < dimension; dim++) {
num_orows *= TensorUtils<TensorTypeK>::getSize(state, src, dim);
}
unsigned row_size = TensorUtils<TensorTypeK>::getSize(state, src, dimension);
diff --git a/lib/THC/THCTensorRandom.cu b/lib/THC/THCTensorRandom.cu
index 769a8ba..05b41b3 100644
--- a/lib/THC/THCTensorRandom.cu
+++ b/lib/THC/THCTensorRandom.cu
@@ -238,13 +238,13 @@ __global__ void generate_log_normal(curandStateMtgp32 *state, int size, float *r
}
}
-#define NUM_BLOCKS min((int)THCCeilDiv(size, (long) BLOCK_SIZE), MAX_NUM_BLOCKS)
+#define NUM_BLOCKS min(THCCeilDiv(size, (ptrdiff_t) BLOCK_SIZE), (ptrdiff_t) MAX_NUM_BLOCKS)
THC_API void THCudaTensor_uniform(THCState* state, THCudaTensor *self_, double a, double b)
{
THAssert(THCudaTensor_checkGPU(state, 1, self_));
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_uniform<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -258,7 +258,7 @@ THC_API void THCudaTensor_bernoulli(THCState* state, THCudaTensor *self_, double
THAssert(THCudaTensor_checkGPU(state, 1, self_));
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_bernoulli<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -272,7 +272,7 @@ THC_API void THCudaTensor_normal(THCState* state, THCudaTensor *self_, double me
THAssert(THCudaTensor_checkGPU(state, 1, self_));
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_normal<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -287,7 +287,7 @@ THC_API void THCudaTensor_logNormal(THCState* state, THCudaTensor *self_, double
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_log_normal<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -302,7 +302,7 @@ THC_API void THCudaTensor_geometric(THCState* state, THCudaTensor *self_, double
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_geometric<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -317,7 +317,7 @@ THC_API void THCudaTensor_exponential(THCState* state, THCudaTensor *self_, doub
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_exponential<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -332,7 +332,7 @@ THC_API void THCudaTensor_cauchy(THCState* state, THCudaTensor *self_, double me
Generator* gen = THCRandom_getGenerator(state);
THCudaTensor *self = THCudaTensor_newContiguous(state, self_);
- long size = THCudaTensor_nElement(state, self);
+ ptrdiff_t size = THCudaTensor_nElement(state, self);
float *data = THCudaTensor_data(state, self);
generate_cauchy<<<NUM_BLOCKS, BLOCK_SIZE, 0, THCState_getCurrentStream(state)>>>(
@@ -704,7 +704,7 @@ THC_API void THCudaTensor_multinomial(struct THCState *state,
// Each warp in a block will generate a sample from a different
// distribution concurrently.
- long numBlocks = THCCeilDiv(numDist, 4L);
+ ptrdiff_t numBlocks = THCCeilDiv(numDist, 4L);
dim3 grid(numBlocks < MAX_NUM_BLOCKS ? numBlocks : MAX_NUM_BLOCKS);
for (int sample = 0; sample < n_sample; ++sample) {
diff --git a/lib/THC/THCTensorSort.cu b/lib/THC/THCTensorSort.cu
index f2dd0e9..589d3e9 100644
--- a/lib/THC/THCTensorSort.cu
+++ b/lib/THC/THCTensorSort.cu
@@ -22,9 +22,9 @@ void THCudaLongTensor_fillSliceWithIndex(THCState* state,
long dims = THCudaLongTensor_nDimension(state, t);
THArgCheck(dims <= MAX_CUTORCH_DIMS, 2, CUTORCH_DIM_WARNING);
- long inElements = THCudaLongTensor_nElement(state, t);
+ ptrdiff_t inElements = THCudaLongTensor_nElement(state, t);
long sliceSize = THCudaLongTensor_size(state, t, dim);
- long numSlices = inElements / sliceSize;
+ ptrdiff_t numSlices = inElements / sliceSize;
dim3 grid;
if (!THC_getGridFromTiles(numSlices, grid)) {
diff --git a/lib/THC/THCTensorTypeUtils.cu b/lib/THC/THCTensorTypeUtils.cu
index 96cd3bc..a273a72 100644
--- a/lib/THC/THCTensorTypeUtils.cu
+++ b/lib/THC/THCTensorTypeUtils.cu
@@ -80,7 +80,7 @@ TensorUtils<TENSOR_TYPE>::getData(THCState* state, \
return (DATA_TYPE*) TENSOR_TYPE##_data(state, t); \
} \
\
-long \
+ptrdiff_t \
TensorUtils<TENSOR_TYPE>::getNumElements(THCState* state, \
TENSOR_TYPE* t) { \
return TENSOR_TYPE##_nElement(state, t); \
@@ -185,18 +185,18 @@ TensorUtils<TENSOR_TYPE>::overlappingIndices(THCState* state, \
bool \
TensorUtils<TENSOR_TYPE>::canUse32BitIndexMath(THCState* state, \
TENSOR_TYPE* t) { \
- long elements = TensorUtils<TENSOR_TYPE>::getNumElements(state, t); \
+ ptrdiff_t elements = TensorUtils<TENSOR_TYPE>::getNumElements(state, t); \
if (elements >= UINT_MAX) { \
return false; \
} \
\
- long offset = 0; \
- long linearId = elements - 1; \
+ ptrdiff_t offset = 0; \
+ ptrdiff_t linearId = elements - 1; \
\
for (int i = TensorUtils<TENSOR_TYPE>::getDims(state, t) - 1; i >= 0; --i) { \
- long curDimIndex = \
+ ptrdiff_t curDimIndex = \
linearId % TensorUtils<TENSOR_TYPE>::getSize(state, t, i); \
- long curDimOffset = curDimIndex * \
+ ptrdiff_t curDimOffset = curDimIndex * \
TensorUtils<TENSOR_TYPE>::getStride(state, t, i); \
offset += curDimOffset; \
linearId /= TensorUtils<TENSOR_TYPE>::getSize(state, t, i); \
diff --git a/lib/THC/THCTensorTypeUtils.cuh b/lib/THC/THCTensorTypeUtils.cuh
index 22a2f92..4f5d516 100644
--- a/lib/THC/THCTensorTypeUtils.cuh
+++ b/lib/THC/THCTensorTypeUtils.cuh
@@ -50,7 +50,7 @@ struct TensorUtils {
static void resizeAs(THCState* state, TENSOR_TYPE* dst, \
TENSOR_TYPE* src); \
static DATA_TYPE* getData(THCState* state, TENSOR_TYPE* t); \
- static long getNumElements(THCState* state, TENSOR_TYPE* t); \
+ static ptrdiff_t getNumElements(THCState* state, TENSOR_TYPE* t); \
static long getSize(THCState* state, TENSOR_TYPE* t, int dim); \
static long getStride(THCState* state, TENSOR_TYPE* t, int dim); \
static int getDims(THCState* state, TENSOR_TYPE* t); \
diff --git a/lib/THC/generic/THCStorage.c b/lib/THC/generic/THCStorage.c
index 61ba125..ad68526 100644
--- a/lib/THC/generic/THCStorage.c
+++ b/lib/THC/generic/THCStorage.c
@@ -7,7 +7,7 @@ real* THCStorage_(data)(THCState *state, const THCStorage *self)
return self->data;
}
-long THCStorage_(size)(THCState *state, const THCStorage *self)
+ptrdiff_t THCStorage_(size)(THCState *state, const THCStorage *self)
{
return self->size;
}
@@ -17,13 +17,13 @@ int THCStorage_(elementSize)(THCState *state)
return sizeof(real);
}
-void THCStorage_(set)(THCState *state, THCStorage *self, long index, real value)
+void THCStorage_(set)(THCState *state, THCStorage *self, ptrdiff_t index, real value)
{
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(real), cudaMemcpyHostToDevice));
}
-real THCStorage_(get)(THCState *state, const THCStorage *self, long index)
+real THCStorage_(get)(THCState *state, const THCStorage *self, ptrdiff_t index)
{
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
real value;
@@ -41,7 +41,7 @@ THCStorage* THCStorage_(new)(THCState *state)
return storage;
}
-THCStorage* THCStorage_(newWithSize)(THCState *state, long size)
+THCStorage* THCStorage_(newWithSize)(THCState *state, ptrdiff_t size)
{
THArgCheck(size >= 0, 2, "invalid size");
@@ -103,13 +103,13 @@ THCStorage* THCStorage_(newWithSize4)(THCState *state, real data0, real data1, r
return self;
}
-THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, long size, int isShared)
+THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, ptrdiff_t size, int isShared)
{
THError("not available yet for THCStorage");
return NULL;
}
-THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size)
+THCStorage* THCStorage_(newWithData)(THCState *state, real *data, ptrdiff_t size)
{
THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage));
storage->data = data;
diff --git a/lib/THC/generic/THCStorage.cu b/lib/THC/generic/THCStorage.cu
index a6bb29a..63bccd7 100644
--- a/lib/THC/generic/THCStorage.cu
+++ b/lib/THC/generic/THCStorage.cu
@@ -12,7 +12,7 @@ void THCStorage_(fill)(THCState *state, THCStorage *self, real value)
self_data, self_data+self->size, value);
}
-void THCStorage_(resize)(THCState *state, THCStorage *self, long size)
+void THCStorage_(resize)(THCState *state, THCStorage *self, ptrdiff_t size)
{
THArgCheck(size >= 0, 2, "invalid size");
diff --git a/lib/THC/generic/THCStorage.h b/lib/THC/generic/THCStorage.h
index 194ad62..a46caad 100644
--- a/lib/THC/generic/THCStorage.h
+++ b/lib/THC/generic/THCStorage.h
@@ -9,7 +9,7 @@
typedef struct THCStorage
{
real *data;
- long size;
+ ptrdiff_t size;
int refcount;
char flag;
THAllocator *allocator;
@@ -19,36 +19,36 @@ typedef struct THCStorage
THC_API real* THCStorage_(data)(THCState *state, const THCStorage*);
-THC_API long THCStorage_(size)(THCState *state, const THCStorage*);
+THC_API ptrdiff_t THCStorage_(size)(THCState *state, const THCStorage*);
THC_API int THCStorage_(elementSize)(THCState *state);
/* slow access -- checks everything */
-THC_API void THCStorage_(set)(THCState *state, THCStorage*, long, real);
-THC_API real THCStorage_(get)(THCState *state, const THCStorage*, long);
+THC_API void THCStorage_(set)(THCState *state, THCStorage*, ptrdiff_t, real);
+THC_API real THCStorage_(get)(THCState *state, const THCStorage*, ptrdiff_t);
THC_API THCStorage* THCStorage_(new)(THCState *state);
-THC_API THCStorage* THCStorage_(newWithSize)(THCState *state, long size);
+THC_API THCStorage* THCStorage_(newWithSize)(THCState *state, ptrdiff_t size);
THC_API THCStorage* THCStorage_(newWithSize1)(THCState *state, real);
THC_API THCStorage* THCStorage_(newWithSize2)(THCState *state, real, real);
THC_API THCStorage* THCStorage_(newWithSize3)(THCState *state, real, real, real);
THC_API THCStorage* THCStorage_(newWithSize4)(THCState *state, real, real, real, real);
-THC_API THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *filename, long size, int shared);
+THC_API THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *filename, ptrdiff_t size, int shared);
/* takes ownership of data */
-THC_API THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size);
+THC_API THCStorage* THCStorage_(newWithData)(THCState *state, real *data, ptrdiff_t size);
-THC_API THCStorage* THCStorage_(newWithAllocator)(THCState *state, long size,
+THC_API THCStorage* THCStorage_(newWithAllocator)(THCState *state, ptrdiff_t size,
THAllocator* allocator,
void *allocatorContext);
THC_API THCStorage* THCStorage_(newWithDataAndAllocator)(
- THCState *state, real* data, long size, THAllocator* allocator, void *allocatorContext);
+ THCState *state, real* data, ptrdiff_t size, THAllocator* allocator, void *allocatorContext);
THC_API void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag);
THC_API void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag);
THC_API void THCStorage_(retain)(THCState *state, THCStorage *storage);
THC_API void THCStorage_(free)(THCState *state, THCStorage *storage);
-THC_API void THCStorage_(resize)(THCState *state, THCStorage *storage, long size);
+THC_API void THCStorage_(resize)(THCState *state, THCStorage *storage, ptrdiff_t size);
THC_API void THCStorage_(fill)(THCState *state, THCStorage *storage, real value);
THC_API int THCStorage_(getDevice)(THCState* state, const THCStorage* storage);
diff --git a/lib/THC/generic/THCTensor.c b/lib/THC/generic/THCTensor.c
index e18044d..f6c82b5 100644
--- a/lib/THC/generic/THCTensor.c
+++ b/lib/THC/generic/THCTensor.c
@@ -8,7 +8,7 @@ THCStorage *THCTensor_(storage)(THCState *state, const THCTensor *self)
return self->storage;
}
-long THCTensor_(storageOffset)(THCState *state, const THCTensor *self)
+ptrdiff_t THCTensor_(storageOffset)(THCState *state, const THCTensor *self)
{
return self->storageOffset;
}
@@ -65,7 +65,7 @@ void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag)
/**** creation methods ****/
static void THCTensor_(rawInit)(THCState *state, THCTensor *self);
-static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride);
+static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride);
/* Empty init */
@@ -92,7 +92,7 @@ THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor)
}
/* Storage init */
-THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride)
+THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset, THLongStorage *size, THLongStorage *stride)
{
THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
if(size && stride)
@@ -109,20 +109,20 @@ THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, long
return self;
}
-THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset,
long size0, long stride0)
{
return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1);
}
-THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset,
long size0, long stride0,
long size1, long stride1)
{
return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1);
}
-THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset,
long size0, long stride0,
long size1, long stride1,
long size2, long stride2)
@@ -130,7 +130,7 @@ THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage, lo
return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1);
}
-THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage, ptrdiff_t storageOffset,
long size0, long stride0,
long size1, long stride1,
long size2, long stride2,
@@ -296,7 +296,7 @@ void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src)
src->stride);
}
-void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_)
+void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_)
{
if(size_ && stride_)
THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes");
@@ -310,7 +310,7 @@ void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storag
(stride_ ? stride_->data : NULL));
}
-void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_)
{
THCTensor_(setStorage4d)(state, self, storage_, storageOffset_,
@@ -320,7 +320,7 @@ void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *stor
-1, -1);
}
-void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_)
{
@@ -331,7 +331,7 @@ void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *stor
-1, -1);
}
-void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_)
@@ -343,7 +343,7 @@ void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *stor
-1, -1);
}
-void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_,
@@ -578,13 +578,13 @@ int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTe
return 1;
}
-long THCTensor_(nElement)(THCState *state, const THCTensor *self)
+ptrdiff_t THCTensor_(nElement)(THCState *state, const THCTensor *self)
{
if(self->nDimension == 0)
return 0;
else
{
- long nElement = 1;
+ ptrdiff_t nElement = 1;
int d;
for(d = 0; d < self->nDimension; d++)
nElement *= self->size[d];
@@ -637,7 +637,7 @@ static void THCTensor_(rawInit)(THCState *state, THCTensor *self)
self->flag = TH_TENSOR_REFCOUNTED;
}
-static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride)
+static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)
{
/* storage */
if(self->storage != storage)
@@ -667,7 +667,7 @@ void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, lon
{
int d;
int nDimension_;
- long totalSize;
+ ptrdiff_t totalSize;
int hascorrectsize = 1;
nDimension_ = 0;
diff --git a/lib/THC/generic/THCTensor.h b/lib/THC/generic/THCTensor.h
index 175eaee..3c5291e 100644
--- a/lib/THC/generic/THCTensor.h
+++ b/lib/THC/generic/THCTensor.h
@@ -11,7 +11,7 @@ typedef struct THCTensor
int nDimension;
THCStorage *storage;
- long storageOffset;
+ ptrdiff_t storageOffset;
int refcount;
char flag;
@@ -21,7 +21,7 @@ typedef struct THCTensor
/**** access methods ****/
THC_API THCStorage* THCTensor_(storage)(THCState *state, const THCTensor *self);
-THC_API long THCTensor_(storageOffset)(THCState *state, const THCTensor *self);
+THC_API ptrdiff_t THCTensor_(storageOffset)(THCState *state, const THCTensor *self);
THC_API int THCTensor_(nDimension)(THCState *state, const THCTensor *self);
THC_API long THCTensor_(size)(THCState *state, const THCTensor *self, int dim);
THC_API long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim);
@@ -37,17 +37,17 @@ THC_API void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char
THC_API THCTensor *THCTensor_(new)(THCState *state);
THC_API THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor);
/* stride might be NULL */
-THC_API THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
-THC_API THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);
+THC_API THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_);
-THC_API THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_);
-THC_API THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_);
-THC_API THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_,
@@ -77,17 +77,17 @@ THC_API void THCTensor_(resize5d)(THCState *state, THCTensor *tensor, long size0
THC_API void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride);
THC_API void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src);
-THC_API void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
-THC_API void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);
+THC_API void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_);
-THC_API void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_);
-THC_API void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_);
-THC_API void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, ptrdiff_t storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_,
@@ -105,7 +105,7 @@ THC_API int THCTensor_(isContiguous)(THCState *state, const THCTensor *self);
THC_API int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor *src);
THC_API int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src);
THC_API int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims);
-THC_API long THCTensor_(nElement)(THCState *state, const THCTensor *self);
+THC_API ptrdiff_t THCTensor_(nElement)(THCState *state, const THCTensor *self);
THC_API void THCTensor_(retain)(THCState *state, THCTensor *self);
THC_API void THCTensor_(free)(THCState *state, THCTensor *self);
diff --git a/lib/THC/generic/THCTensorIndex.cu b/lib/THC/generic/THCTensorIndex.cu
index e17cebf..ce4c790 100644
--- a/lib/THC/generic/THCTensorIndex.cu
+++ b/lib/THC/generic/THCTensorIndex.cu
@@ -26,7 +26,7 @@ void THCTensor_(indexCopy)(THCState *state, THCTensor *dst, int dim, THCudaLongT
dims = THCudaLongTensor_nDimension(state, indices);
THArgCheck(dims <= MAX_CUTORCH_DIMS, 4, CUTORCH_DIM_WARNING);
- long numIndices = THCudaLongTensor_nElement(state, indices);
+ ptrdiff_t numIndices = THCudaLongTensor_nElement(state, indices);
long srcDims = THCTensor_(nDimension)(state, src);
cudaStream_t stream = THCState_getCurrentStream(state);
@@ -44,9 +44,9 @@ void THCTensor_(indexCopy)(THCState *state, THCTensor *dst, int dim, THCudaLongT
// total size of the tensor ignoring dimension `dim`;
// -the number of indices we are choosing, which is the total size
// of the tensor `indices`.
- long srcTotalSize = THCTensor_(nElement)(state, src);
+ ptrdiff_t srcTotalSize = THCTensor_(nElement)(state, src);
long dstCopyDimSize = THCTensor_(size)(state, dst, dim);
- long sliceSize = srcTotalSize / numIndices;
+ ptrdiff_t sliceSize = srcTotalSize / numIndices;
int mpc = THCState_getCurrentDeviceProperties(state)->multiProcessorCount;
@@ -62,11 +62,11 @@ void THCTensor_(indexCopy)(THCState *state, THCTensor *dst, int dim, THCudaLongT
dstInfo, srcInfo, indicesInfo, \
dstCopyDim, srcCopyDim, sliceSize, dstCopyDimSize);
- dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, 128L), (long)(mpc * 8)));
- dim3 smallIndexBlock(std::min(sliceSize, 128L));
+ dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
- dim3 largeIndexGrid(std::min(THCCeilDiv(srcTotalSize, 128L), (long)(mpc * 8)));
- dim3 largeIndexBlock(std::min(srcTotalSize, 128L));
+ dim3 largeIndexGrid(std::min(THCCeilDiv(srcTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 largeIndexBlock(std::min(srcTotalSize, (ptrdiff_t)128));
if (TensorUtils<THCTensor>::canUse32BitIndexMath(state, dst) &&
TensorUtils<THCTensor>::canUse32BitIndexMath(state, src) &&
@@ -154,7 +154,7 @@ void THCTensor_(indexAdd)(THCState *state, THCTensor *dst, int dim, THCudaLongTe
dims = THCudaLongTensor_nDimension(state, indices);
THArgCheck(dims <= MAX_CUTORCH_DIMS, 4, CUTORCH_DIM_WARNING);
- long numIndices = THCudaLongTensor_nElement(state, indices);
+ ptrdiff_t numIndices = THCudaLongTensor_nElement(state, indices);
long srcDims = THCTensor_(nDimension)(state, src);
cudaStream_t stream = THCState_getCurrentStream(state);
@@ -172,9 +172,9 @@ void THCTensor_(indexAdd)(THCState *state, THCTensor *dst, int dim, THCudaLongTe
// total size of the tensor ignoring dimension `dim`;
// -the number of indices we are choosing, which is the total size
// of the tensor `indices`.
- long srcTotalSize = THCTensor_(nElement)(state, src);
+ ptrdiff_t srcTotalSize = THCTensor_(nElement)(state, src);
long dstAddDimSize = THCTensor_(size)(state, dst, dim);
- long sliceSize = srcTotalSize / numIndices;
+ ptrdiff_t sliceSize = srcTotalSize / numIndices;
int mpc = THCState_getCurrentDeviceProperties(state)->multiProcessorCount;
@@ -190,11 +190,11 @@ void THCTensor_(indexAdd)(THCState *state, THCTensor *dst, int dim, THCudaLongTe
dstInfo, srcInfo, indicesInfo, \
dstAddDim, srcAddDim, sliceSize, dstAddDimSize);
- dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, 128L), (long)(mpc * 8)));
- dim3 smallIndexBlock(std::min(sliceSize, 128L));
+ dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
- dim3 largeIndexGrid(std::min(THCCeilDiv(srcTotalSize, 128L), (long)(mpc * 8)));
- dim3 largeIndexBlock(std::min(srcTotalSize, 128L));
+ dim3 largeIndexGrid(std::min(THCCeilDiv(srcTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 largeIndexBlock(std::min(srcTotalSize, (ptrdiff_t)128));
if (TensorUtils<THCTensor>::canUse32BitIndexMath(state, dst) &&
TensorUtils<THCTensor>::canUse32BitIndexMath(state, src) &&
@@ -279,7 +279,7 @@ void THCTensor_(indexFill)(THCState *state, THCTensor *dst, int dim, THCudaLongT
dims = THCudaLongTensor_nDimension(state, indices);
THArgCheck(dims <= MAX_CUTORCH_DIMS, 4, CUTORCH_DIM_WARNING);
- long numIndices = THCudaLongTensor_nElement(state, indices);
+ ptrdiff_t numIndices = THCudaLongTensor_nElement(state, indices);
long srcDims = THCTensor_(nDimension)(state, dst);
cudaStream_t stream = THCState_getCurrentStream(state);
@@ -296,9 +296,9 @@ void THCTensor_(indexFill)(THCState *state, THCTensor *dst, int dim, THCudaLongT
// total size of the tensor ignoring dimension `dim`;
// -the number of indices we are choosing, which is the total size
// of the tensor `indices`.
- long dstTotalSize = THCTensor_(nElement)(state, dst);
+ ptrdiff_t dstTotalSize = THCTensor_(nElement)(state, dst);
long dstFillDimSize = THCTensor_(size)(state, dst, dim);
- long sliceSize = dstTotalSize / dstFillDimSize;
+ ptrdiff_t sliceSize = dstTotalSize / dstFillDimSize;
int mpc = THCState_getCurrentDeviceProperties(state)->multiProcessorCount;
@@ -314,11 +314,11 @@ void THCTensor_(indexFill)(THCState *state, THCTensor *dst, int dim, THCudaLongT
dstInfo, indicesInfo, \
dstFillDim, sliceSize, dstFillDimSize, val);
- dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, 128L), (long)(mpc * 8)));
- dim3 smallIndexBlock(std::min(sliceSize, 128L));
+ dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
- dim3 largeIndexGrid(std::min(THCCeilDiv(dstTotalSize, 128L), (long)(mpc * 8)));
- dim3 largeIndexBlock(std::min(dstTotalSize, 128L));
+ dim3 largeIndexGrid(std::min(THCCeilDiv(dstTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 largeIndexBlock(std::min(dstTotalSize, (ptrdiff_t)128));
if (TensorUtils<THCTensor>::canUse32BitIndexMath(state, dst) &&
TensorUtils<THCudaLongTensor>::canUse32BitIndexMath(state, indices)) {
@@ -396,7 +396,7 @@ void THCTensor_(indexSelect)(THCState *state, THCTensor *dst, THCTensor *src, in
dims = THCudaLongTensor_nDimension(state, indices);
THArgCheck(dims <= MAX_CUTORCH_DIMS, 5, CUTORCH_DIM_WARNING);
- long numIndices = THCudaLongTensor_nElement(state, indices);
+ ptrdiff_t numIndices = THCudaLongTensor_nElement(state, indices);
long srcDims = THCTensor_(nDimension)(state, src);
cudaStream_t stream = THCState_getCurrentStream(state);
@@ -418,9 +418,9 @@ void THCTensor_(indexSelect)(THCState *state, THCTensor *dst, THCTensor *src, in
// total size of the tensor ignoring dimension `dim`;
// -the number of indices we are choosing, which is the total size
// of the tensor `indices`.
- long dstTotalSize = THCTensor_(nElement)(state, dst);
+ ptrdiff_t dstTotalSize = THCTensor_(nElement)(state, dst);
long srcSelectDimSize = THCTensor_(size)(state, src, dim);
- long sliceSize = dstTotalSize / numIndices;
+ ptrdiff_t sliceSize = dstTotalSize / numIndices;
int mpc = THCState_getCurrentDeviceProperties(state)->multiProcessorCount;
@@ -436,11 +436,11 @@ void THCTensor_(indexSelect)(THCState *state, THCTensor *dst, THCTensor *src, in
dstInfo, srcInfo, indicesInfo, \
dstSelectDim, srcSelectDim, dstTotalSize, sliceSize, srcSelectDimSize);
- dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, 128L), (long)(mpc * 8)));
- dim3 smallIndexBlock(std::min(sliceSize, 128L));
+ dim3 smallIndexGrid(std::min(THCCeilDiv(sliceSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 smallIndexBlock(std::min(sliceSize, (ptrdiff_t)128));
- dim3 largeIndexGrid(std::min(THCCeilDiv(dstTotalSize, 128L), (long)(mpc * 8)));
- dim3 largeIndexBlock(std::min(dstTotalSize, 128L));
+ dim3 largeIndexGrid(std::min(THCCeilDiv(dstTotalSize, (ptrdiff_t)128), (ptrdiff_t)(mpc * 8)));
+ dim3 largeIndexBlock(std::min(dstTotalSize, (ptrdiff_t)128));
if (TensorUtils<THCTensor>::canUse32BitIndexMath(state, dst) &&
TensorUtils<THCTensor>::canUse32BitIndexMath(state, src) &&
diff --git a/lib/THC/generic/THCTensorMasked.cu b/lib/THC/generic/THCTensorMasked.cu
index e6a5704..333af29 100644
--- a/lib/THC/generic/THCTensorMasked.cu
+++ b/lib/THC/generic/THCTensorMasked.cu
@@ -38,16 +38,16 @@ THCTensor_(maskedCopy)(THCState* state,
THCTensor *tensor, THCudaByteTensor *mask, THCTensor *src)
{
THAssert(THCTensor_(checkGPU)(state, 3, tensor, src, mask));
- long maskSize = THCudaByteTensor_nElement(state, mask);
- long tensorSize = THCTensor_(nElement)(state, tensor);
- long srcSize = THCTensor_(nElement)(state, src);
+ ptrdiff_t maskSize = THCudaByteTensor_nElement(state, mask);
+ ptrdiff_t tensorSize = THCTensor_(nElement)(state, tensor);
+ ptrdiff_t srcSize = THCTensor_(nElement)(state, src);
// `mask` and `tensor` must have the same number of elements
THArgCheck(maskSize == tensorSize, 2,
"mask and tensor must have the same number of elements");
// Determine our output size
- long totalElements = THCudaByteTensor_sumall(state, mask);
+ ptrdiff_t totalElements = THCudaByteTensor_sumall(state, mask);
// The number of `1` elements present in the mask must be <= the
// number of elements available in `src`
@@ -121,7 +121,7 @@ THCTensor_(maskedSelect)(THCState* state,
2, "sizes do not match");
// Determine our output size
- long totalElements = THCudaByteTensor_sumall(state, mask);
+ ptrdiff_t totalElements = THCudaByteTensor_sumall(state, mask);
THCTensor* tensorContig = THCTensor_(newContiguous)(state, tensor);
THCTensor_(resize1d)(state, tensorContig, totalElements);
diff --git a/lib/THC/generic/THCTensorMath.cu b/lib/THC/generic/THCTensorMath.cu
index a0e550a..557f8f5 100644
--- a/lib/THC/generic/THCTensorMath.cu
+++ b/lib/THC/generic/THCTensorMath.cu
@@ -59,7 +59,7 @@ THCTensor_(reshape)(THCState *state, THCTensor *r_, THCTensor *t, THLongStorage
THCTensor_(copy)(state, r_, t);
}
-long
+ptrdiff_t
THCTensor_(numel)(THCState *state, THCTensor *t)
{
return THCTensor_(nElement)(state, t);
diff --git a/lib/THC/generic/THCTensorMath.h b/lib/THC/generic/THCTensorMath.h
index 5c9e66d..cfc706a 100644
--- a/lib/THC/generic/THCTensorMath.h
+++ b/lib/THC/generic/THCTensorMath.h
@@ -8,6 +8,6 @@ THC_API void THCTensor_(zero)(THCState *state, THCTensor *self);
THC_API void THCTensor_(zeros)(THCState *state, THCTensor *r_, THLongStorage *size);
THC_API void THCTensor_(ones)(THCState *state, THCTensor *r_, THLongStorage *size);
THC_API void THCTensor_(reshape)(THCState *state, THCTensor *r_, THCTensor *t, THLongStorage *size);
-THC_API long THCTensor_(numel)(THCState *state, THCTensor *t);
+THC_API ptrdiff_t THCTensor_(numel)(THCState *state, THCTensor *t);
#endif
diff --git a/lib/THC/generic/THCTensorMathPointwise.cu b/lib/THC/generic/THCTensorMathPointwise.cu
index 90ddfbc..2638504 100644
--- a/lib/THC/generic/THCTensorMathPointwise.cu
+++ b/lib/THC/generic/THCTensorMathPointwise.cu
@@ -108,7 +108,7 @@ THCTensor_(cross)(THCState *state, THCTensor *self, THCTensor *x, THCTensor *y,
int i;
long nd = THCTensor_(nDimension)(state, x);
- long nelem = THCTensor_(nElement)(state, x);
+ ptrdiff_t nelem = THCTensor_(nElement)(state, x);
THArgCheck(nd == THCTensor_(nDimension)(state, y), 1, "tensors must have same number of dimensions");
for (i = 0; i < nd; i++) {
THArgCheck(THCTensor_(size)(state, x, i) == THCTensor_(size)(state, y, i), 1, "dimension %i of x and y does not match", i);
diff --git a/lib/THC/generic/THCTensorMathReduce.cu b/lib/THC/generic/THCTensorMathReduce.cu
index 502fa75..1e21d03 100644
--- a/lib/THC/generic/THCTensorMathReduce.cu
+++ b/lib/THC/generic/THCTensorMathReduce.cu
@@ -47,7 +47,7 @@ THCTensor_(renorm)(THCState *state, THCTensor* self, THCTensor* src, real value,
THCTensor *self_;
THCTensor *src_ = THCTensor_(newTranspose)(state, src, dimension, 0);
THCTensor *data = THCTensor_(newClone)(state, src_);
- long size = THCTensor_(nElement)(state, data)/data->size[0];
+ ptrdiff_t size = THCTensor_(nElement)(state, data)/data->size[0];
THArgCheck(dimension >= 0 && dimension < THCTensor_(nDimension)(state, src), 3, "invalid dimension");
THArgCheck(THCNumerics<real>::gt(value, ScalarConvert<int, real>::to(0)), 2, "non-positive-norm not supported");
@@ -138,7 +138,7 @@ THCTensor_(varall)(THCState *state, THCTensor *self)
val = THCNumerics<accreal>::div(
val,
- ScalarConvert<int, accreal>::to(THCTensor_(nElement)(state, self) - 1)
+ ScalarConvert<ptrdiff_t, accreal>::to(THCTensor_(nElement)(state, self) - 1)
);
THCudaCheck(cudaGetLastError());
diff --git a/lib/THC/generic/THCTensorScatterGather.cu b/lib/THC/generic/THCTensorScatterGather.cu
index 7842d3d..c120f88 100644
--- a/lib/THC/generic/THCTensorScatterGather.cu
+++ b/lib/THC/generic/THCTensorScatterGather.cu
@@ -34,7 +34,7 @@ void THCTensor_(gather)(THCState* state, THCTensor *tensor,
1, CUTORCH_DIM_WARNING);
- const long totalElements = THCudaLongTensor_nElement(state, index);
+ const ptrdiff_t totalElements = THCudaLongTensor_nElement(state, index);
const dim3 block = getApplyBlock();
dim3 grid;
THArgCheck(getApplyGrid(state, totalElements, grid), 1, CUTORCH_DIM_WARNING);
@@ -126,7 +126,7 @@ void THCTensor_(scatter)(THCState* state, THCTensor *tensor, int dim, THCudaLong
THArgCheck(THCTensor_(nDimension)(state, tensor) <= MAX_CUTORCH_DIMS,
1, CUTORCH_DIM_WARNING);
- const long totalElements = THCudaLongTensor_nElement(state, index);
+ const ptrdiff_t totalElements = THCudaLongTensor_nElement(state, index);
const dim3 block = getApplyBlock();
dim3 grid;
THArgCheck(getApplyGrid(state, totalElements, grid), 1, CUTORCH_DIM_WARNING);
@@ -211,7 +211,7 @@ THCTensor_(scatterFill)(THCState* state, THCTensor *tensor,
THArgCheck(THCTensor_(nDimension)(state, tensor) <= MAX_CUTORCH_DIMS,
1, CUTORCH_DIM_WARNING);
- const long totalElements = THCudaLongTensor_nElement(state, index);
+ const ptrdiff_t totalElements = THCudaLongTensor_nElement(state, index);
const dim3 block = getApplyBlock();
dim3 grid;
THArgCheck(getApplyGrid(state, totalElements, grid), 1, CUTORCH_DIM_WARNING);
diff --git a/lib/THC/generic/THCTensorSort.cu b/lib/THC/generic/THCTensorSort.cu
index 0116522..046c62b 100644
--- a/lib/THC/generic/THCTensorSort.cu
+++ b/lib/THC/generic/THCTensorSort.cu
@@ -18,9 +18,9 @@ THC_API void THCTensor_(sortKeyValueInplace)(THCState* state,
dims = THCTensor_(nDimension)(state, key);
THArgCheck(dims <= MAX_CUTORCH_DIMS, 2, CUTORCH_DIM_WARNING);
- long inElements = THCTensor_(nElement)(state, key);
+ ptrdiff_t inElements = THCTensor_(nElement)(state, key);
long keySliceSize = THCTensor_(size)(state, key, dim);
- long keySlices = inElements / keySliceSize;
+ ptrdiff_t keySlices = inElements / keySliceSize;
if (THCTensor_(nDimension)(state, key) == 0) {
// Zero-dim tensor; do nothing
@@ -160,7 +160,7 @@ void sortViaThrust(THCState* state,
int dim, bool dir) {
long nDims = THCTensor_(nDimension)(state, input);
- long totalElements = THCTensor_(nElement)(state, input);
+ ptrdiff_t totalElements = THCTensor_(nElement)(state, input);
long sliceSize = THCTensor_(size)(state, input, dim);
long sliceStride = THCTensor_(stride)(state, input, dim);
diff --git a/torch/generic/Storage.c b/torch/generic/Storage.c
index c57c416..e5c0836 100644
--- a/torch/generic/Storage.c
+++ b/torch/generic/Storage.c
@@ -10,13 +10,13 @@ static int torch_Storage_(new)(lua_State *L)
{
const char *fileName = luaL_checkstring(L, 1);
int isShared = luaT_optboolean(L, 2, 0);
- long size = luaL_optlong(L, 3, 0);
+ ptrdiff_t size = luaL_optinteger(L, 3, 0);
storage = THCStorage_(newWithMapping)(state, fileName, size, isShared);
}
else if(lua_type(L, 1) == LUA_TTABLE)
{
- long size = lua_objlen(L, 1);
- long i;
+ ptrdiff_t size = lua_objlen(L, 1);
+ ptrdiff_t i;
storage = THCStorage_(newWithSize)(state, size);
for(i = 1; i <= size; i++)
{
@@ -39,11 +39,11 @@ static int torch_Storage_(new)(lua_State *L)
{
THCStorage *src = luaT_checkudata(L, 1, torch_Storage);
real *ptr = src->data;
- long offset = luaL_optlong(L, 2, 1) - 1;
+ ptrdiff_t offset = luaL_optinteger(L, 2, 1) - 1;
if (offset < 0 || offset >= src->size) {
luaL_error(L, "offset out of bounds");
}
- long size = luaL_optlong(L, 3, src->size - offset);
+ ptrdiff_t size = luaL_optinteger(L, 3, src->size - offset);
if (size < 1 || size > (src->size - offset)) {
luaL_error(L, "size out of bounds");
}
@@ -54,14 +54,14 @@ static int torch_Storage_(new)(lua_State *L)
}
else if(lua_type(L, 2) == LUA_TNUMBER)
{
- long size = luaL_optlong(L, 1, 0);
+ ptrdiff_t size = luaL_optinteger(L, 1, 0);
real *ptr = (real *)luaL_optinteger(L, 2, 0);
storage = THCStorage_(newWithData)(state, ptr, size);
storage->flag = TH_STORAGE_REFCOUNTED;
}
else
{
- long size = luaL_optlong(L, 1, 0);
+ ptrdiff_t size = luaL_optinteger(L, 1, 0);
storage = THCStorage_(newWithSize)(state, size);
}
luaT_pushudata(L, storage, torch_Storage);
@@ -85,7 +85,7 @@ static int torch_Storage_(free)(lua_State *L)
static int torch_Storage_(resize)(lua_State *L)
{
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- long size = luaL_checklong(L, 2);
+ ptrdiff_t size = luaL_checkinteger(L, 2);
/* int keepContent = luaT_optboolean(L, 3, 0); */
THCStorage_(resize)(cutorch_getstate(L), storage, size);/*, keepContent); */
lua_settop(L, 1);
@@ -141,7 +141,7 @@ static int torch_Storage_(elementSize)(lua_State *L)
static int torch_Storage_(__len__)(lua_State *L)
{
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- lua_pushnumber(L, storage->size);
+ lua_pushinteger(L, storage->size);
return 1;
}
@@ -150,7 +150,7 @@ static int torch_Storage_(__newindex__)(lua_State *L)
if(lua_isnumber(L, 2))
{
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- long index = luaL_checklong(L, 2) - 1;
+ ptrdiff_t index = luaL_checkinteger(L, 2) - 1;
double number = luaL_checknumber(L, 3);
#ifdef THC_REAL_IS_HALF
@@ -172,7 +172,7 @@ static int torch_Storage_(__index__)(lua_State *L)
if(lua_isnumber(L, 2))
{
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- long index = luaL_checklong(L, 2) - 1;
+ ptrdiff_t index = luaL_checkinteger(L, 2) - 1;
real v = THCStorage_(get)(cutorch_getstate(L), storage, index);
#ifdef THC_REAL_IS_HALF
@@ -196,7 +196,7 @@ static int torch_Storage_(totable)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- long i;
+ ptrdiff_t i;
/* Copy storage from device to host. */
#ifndef THC_REAL_IS_HALF
@@ -235,6 +235,9 @@ static int torch_Storage_(write)(lua_State *L)
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
THFile *file = luaT_checkudata(L, 2, "torch.File");
+#ifdef _MSC_VER
+ THAssert(storage->size < LONG_MAX);
+#endif
THFile_writeLongScalar(file, storage->size);
THFile_writeRealRaw(file, storage->data, storage->size);
diff --git a/torch/generic/Tensor.c b/torch/generic/Tensor.c
index b3c85fe..d7dcd53 100644
--- a/torch/generic/Tensor.c
+++ b/torch/generic/Tensor.c
@@ -5,7 +5,7 @@
#include "THCHalf.h"
static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride,
- THCStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_);
+ THCStorage **storage_, ptrdiff_t *storageOffset_, THLongStorage **size_, THLongStorage **stride_);
static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_);
@@ -75,7 +75,7 @@ static int torch_Tensor_(storage)(lua_State *L)
static int torch_Tensor_(storageOffset)(lua_State *L)
{
THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- lua_pushnumber(L, tensor->storageOffset+1);
+ lua_pushinteger(L, tensor->storageOffset+1);
return 1;
}
@@ -83,14 +83,14 @@ static int torch_Tensor_(new)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
THCTensor *tensor;
- long storageOffset;
+ ptrdiff_t storageOffset;
THLongStorage *size, *stride;
if(lua_type(L, 1) == LUA_TTABLE)
{
- long i, j;
+ ptrdiff_t i, j;
THLongStorage *counter;
- long si = 0;
+ ptrdiff_t si = 0;
int dimension = 0;
int is_finished = 0;
@@ -221,7 +221,7 @@ static int torch_Tensor_(set)(lua_State *L)
{
THCTensor *self = luaT_checkudata(L, 1, torch_Tensor);
THCStorage *storage;
- long storageOffset;
+ ptrdiff_t storageOffset;
THLongStorage *size, *stride;
torch_Tensor_(c_readTensorStorageSizeStride)(L, 2, 1, 1, 1, 1,
@@ -691,7 +691,7 @@ static int torch_Tensor_(isSameSizeAs)(lua_State *L)
static int torch_Tensor_(nElement)(lua_State *L)
{
THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- lua_pushnumber(L, THCTensor_(nElement)(cutorch_getstate(L), tensor));
+ lua_pushinteger(L, THCTensor_(nElement)(cutorch_getstate(L), tensor));
return 1;
}
@@ -801,7 +801,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
}
else if((idx = luaT_toudata(L, 2, "torch.LongStorage")))
{
- long index = THCTensor_(storageOffset)(state, tensor);
+ ptrdiff_t index = THCTensor_(storageOffset)(state, tensor);
#ifdef THC_REAL_IS_HALF
real value = THC_float2half((float) luaL_checknumber(L,3));
@@ -809,7 +809,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
real value = (real)luaL_checknumber(L,3);
#endif
- int dim;
+ ptrdiff_t dim;
luaL_argcheck(L, idx->size == tensor->nDimension, 2, "invalid size");
@@ -1014,7 +1014,7 @@ static int torch_Tensor_(__index__)(lua_State *L)
if(lua_isnumber(L, 2))
{
- long index = luaL_checklong(L,2)-1;
+ ptrdiff_t index = luaL_checkinteger(L,2)-1;
luaL_argcheck(L, tensor->nDimension > 0, 1, "empty tensor");
if (index < 0) index = tensor->size[0] + index + 1;
@@ -1045,8 +1045,8 @@ static int torch_Tensor_(__index__)(lua_State *L)
}
else if((idx = luaT_toudata(L, 2, "torch.LongStorage")))
{
- long index = THCTensor_(storageOffset)(state, tensor);
- int dim;
+ ptrdiff_t index = THCTensor_(storageOffset)(state, tensor);
+ ptrdiff_t dim;
luaL_argcheck(L, idx->size == tensor->nDimension, 2, "invalid size");
@@ -1258,7 +1258,7 @@ static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowSt
}
static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride,
- THCStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_)
+ THCStorage **storage_, ptrdiff_t *storageOffset_, THLongStorage **size_, THLongStorage **stride_)
{
THCState *state = cutorch_getstate(L);
THCTensor *src = NULL;
@@ -1293,7 +1293,7 @@ static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index
}
else
{
- *storageOffset_ = luaL_checklong(L, index+1)-1;
+ *storageOffset_ = luaL_checkinteger(L, index+1)-1;
torch_Tensor_(c_readSizeStride)(L, index+2, allowStride, size_, stride_);
}
return;
@@ -1335,6 +1335,9 @@ static int torch_Tensor_(write)(lua_State *L)
THFile_writeIntScalar(file, tensor->nDimension);
THFile_writeLongRaw(file, tensor->size, tensor->nDimension);
THFile_writeLongRaw(file, tensor->stride, tensor->nDimension);
+#ifdef _MSC_VER
+ THAssert(tensor->storageOffset+1 < LONG_MAX);
+#endif
THFile_writeLongScalar(file, tensor->storageOffset+1); /* to respect Lua convention */
lua_getfield(L, 2, "writeObject"); /* the method */