diff options
Diffstat (limited to 'lib')
26 files changed, 817 insertions, 488 deletions
diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt index a422789..c4591b8 100644 --- a/lib/THC/CMakeLists.txt +++ b/lib/THC/CMakeLists.txt @@ -135,3 +135,18 @@ INSTALL(FILES THCDeviceTensorUtils.cuh THCDeviceTensorUtils-inl.cuh DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC") + +INSTALL(FILES + generic/THCStorage.c + generic/THCStorage.cu + generic/THCStorage.h + generic/THCTensor.c + generic/THCTensor.cu + generic/THCTensor.h + generic/THCStorageCopy.c + generic/THCStorageCopy.cu + generic/THCStorageCopy.h + generic/THCTensorCopy.c + generic/THCTensorCopy.cu + generic/THCTensorCopy.h + DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC/generic") diff --git a/lib/THC/THCGenerateAllTypes.h b/lib/THC/THCGenerateAllTypes.h new file mode 100644 index 0000000..4793d06 --- /dev/null +++ b/lib/THC/THCGenerateAllTypes.h @@ -0,0 +1,114 @@ +#ifndef THC_GENERIC_FILE +#error "You must define THC_GENERIC_FILE before including THGenerateAllTypes.h" +#endif + +#define THCTypeIdxByte 1 +#define THCTypeIdxChar 2 +#define THCTypeIdxShort 3 +#define THCTypeIdxInt 4 +#define THCTypeIdxLong 5 +#define THCTypeIdxFloat 6 +#define THCTypeIdxDouble 7 +#define THCTypeIdx_(T) TH_CONCAT_2(THCTypeIdx,T) + +#define real unsigned char +#define accreal long +#define Real Byte +#define CReal CudaByte +#define THC_REAL_IS_BYTE +#line 1 THC_GENERIC_FILE +/*#line 1 "THByteStorage.h"*/ +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_BYTE + +#define real char +#define accreal long +#define Real Char +#define CReal CudaChar +#define THC_REAL_IS_CHAR +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_CHAR + +#define real short +#define accreal long +#define Real Short +#define CReal CudaShort +#define THC_REAL_IS_SHORT +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_SHORT + +#define real int +#define accreal long +#define Real Int +#define CReal CudaInt +#define THC_REAL_IS_INT +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_INT + +#define real long +#define accreal long +#define Real Long +#define CReal CudaLong +#define THC_REAL_IS_LONG +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_LONG + +#define real float +#define accreal double +#define Real Float +#define CReal Cuda +#define THC_REAL_IS_FLOAT +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_FLOAT + +#define real double +#define accreal double +#define Real Double +#define CReal CudaDouble +#define THC_REAL_IS_DOUBLE +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_DOUBLE + +#undef THCTypeIdxByte +#undef THCTypeIdxChar +#undef THCTypeIdxShort +#undef THCTypeIdxInt +#undef THCTypeIdxLong +#undef THCTypeIdxFloat +#undef THCTypeIdxDouble + +#undef THC_GENERIC_FILE diff --git a/lib/THC/THCStorage.c b/lib/THC/THCStorage.c new file mode 100644 index 0000000..6fc9574 --- /dev/null +++ b/lib/THC/THCStorage.c @@ -0,0 +1,6 @@ +#include "THCStorage.h" +#include "THCGeneral.h" +#include "THAtomic.h" + +#include "generic/THCStorage.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorage.cu b/lib/THC/THCStorage.cu new file mode 100644 index 0000000..2ceb0c7 --- /dev/null +++ b/lib/THC/THCStorage.cu @@ -0,0 +1,10 @@ +#include "THCStorage.h" + +#include <thrust/device_ptr.h> +#include <thrust/fill.h> +#if CUDA_VERSION >= 7000 +#include <thrust/system/cuda/execution_policy.h> +#endif + +#include "generic/THCStorage.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorage.h b/lib/THC/THCStorage.h new file mode 100644 index 0000000..ac1cd70 --- /dev/null +++ b/lib/THC/THCStorage.h @@ -0,0 +1,17 @@ +#ifndef THC_STORAGE_INC +#define THC_STORAGE_INC + +#include "THStorage.h" +#include "THCGeneral.h" + +#define THCStorage TH_CONCAT_3(TH,CReal,Storage) +#define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME) + +/* fast access methods */ +#define THC_STORAGE_GET(storage, idx) ((storage)->data[(idx)]) +#define THC_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value)) + +#include "generic/THCStorage.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/THCStorageCopy.c b/lib/THC/THCStorageCopy.c new file mode 100644 index 0000000..76a8076 --- /dev/null +++ b/lib/THC/THCStorageCopy.c @@ -0,0 +1,5 @@ +#include "THCStorageCopy.h" +#include "THCGeneral.h" + +#include "generic/THCStorageCopy.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorageCopy.cu b/lib/THC/THCStorageCopy.cu new file mode 100644 index 0000000..b8b0417 --- /dev/null +++ b/lib/THC/THCStorageCopy.cu @@ -0,0 +1,5 @@ +#include "THCStorageCopy.h" +#include "THCGeneral.h" + +#include "generic/THCStorageCopy.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorageCopy.h b/lib/THC/THCStorageCopy.h new file mode 100644 index 0000000..ec8011d --- /dev/null +++ b/lib/THC/THCStorageCopy.h @@ -0,0 +1,10 @@ +#ifndef THC_STORAGE_COPY_INC +#define THC_STORAGE_COPY_INC + +#include "THCStorage.h" +#include "THCGeneral.h" + +#include "generic/THCStorageCopy.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/THCTensor.c b/lib/THC/THCTensor.c new file mode 100644 index 0000000..3bcf69d --- /dev/null +++ b/lib/THC/THCTensor.c @@ -0,0 +1,7 @@ +#include "THCGeneral.h" +#include "THCTensor.h" +#include "THCTensorCopy.h" +#include "THAtomic.h" + +#include "generic/THCTensor.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensor.cu b/lib/THC/THCTensor.cu new file mode 100644 index 0000000..1e6fc20 --- /dev/null +++ b/lib/THC/THCTensor.cu @@ -0,0 +1,4 @@ +#include "THCTensor.h" + +#include "generic/THCTensor.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensor.h b/lib/THC/THCTensor.h new file mode 100644 index 0000000..d4eb49a --- /dev/null +++ b/lib/THC/THCTensor.h @@ -0,0 +1,14 @@ +#ifndef THC_TENSOR_INC +#define THC_TENSOR_INC + +#include "THTensor.h" +#include "THCStorage.h" +#include "THCGeneral.h" + +#define THCTensor TH_CONCAT_3(TH,CReal,Tensor) +#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME) + +#include "generic/THCTensor.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/THCTensorCopy.c b/lib/THC/THCTensorCopy.c new file mode 100644 index 0000000..6813530 --- /dev/null +++ b/lib/THC/THCTensorCopy.c @@ -0,0 +1,6 @@ +#include "THCTensorCopy.h" +#include "THCGeneral.h" +#include "THCTensor.h" + +#include "generic/THCTensorCopy.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensorCopy.cu b/lib/THC/THCTensorCopy.cu new file mode 100644 index 0000000..9e59dd8 --- /dev/null +++ b/lib/THC/THCTensorCopy.cu @@ -0,0 +1,10 @@ +#include "THCApply.cuh" + +inline int curGPU() { + int curDev; + THCudaCheck(cudaGetDevice(&curDev)); + return curDev; +} + +#include "generic/THCTensorCopy.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensorCopy.h b/lib/THC/THCTensorCopy.h new file mode 100644 index 0000000..fc206cb --- /dev/null +++ b/lib/THC/THCTensorCopy.h @@ -0,0 +1,10 @@ +#ifndef TH_CUDA_TENSOR_COPY_INC +#define TH_CUDA_TENSOR_COPY_INC + +#include "THCTensor.h" +#include "THCGeneral.h" + +#include "generic/THCTensorCopy.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/generic/THCStorage.c b/lib/THC/generic/THCStorage.c index e7c529c..946ebc1 100644 --- a/lib/THC/generic/THCStorage.c +++ b/lib/THC/generic/THCStorage.c @@ -1,39 +1,39 @@ -#include "THCStorage.h" -#include "THCGeneral.h" -#include "THAtomic.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorage.c" +#else -float* THCudaStorage_data(THCState *state, const THCudaStorage *self) +real* THCStorage_(data)(THCState *state, const THCStorage *self) { return self->data; } -long THCudaStorage_size(THCState *state, const THCudaStorage *self) +long THCStorage_(size)(THCState *state, const THCStorage *self) { return self->size; } -int THCudaStorage_elementSize(THCState *state) +int THCStorage_(elementSize)(THCState *state) { - return sizeof(float); + return sizeof(real); } -void THCudaStorage_set(THCState *state, THCudaStorage *self, long index, float value) +void THCStorage_(set)(THCState *state, THCStorage *self, long index, real value) { THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds"); - THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(float), cudaMemcpyHostToDevice)); + THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(real), cudaMemcpyHostToDevice)); } -float THCudaStorage_get(THCState *state, const THCudaStorage *self, long index) +real THCStorage_(get)(THCState *state, const THCStorage *self, long index) { - float value; + real value; THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds"); - THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(float), cudaMemcpyDeviceToHost)); + THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(real), cudaMemcpyDeviceToHost)); return value; } -THCudaStorage* THCudaStorage_new(THCState *state) +THCStorage* THCStorage_(new)(THCState *state) { - THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); + THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); storage->data = NULL; storage->size = 0; storage->refcount = 1; @@ -41,20 +41,20 @@ THCudaStorage* THCudaStorage_new(THCState *state) return storage; } -THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size) +THCStorage* THCStorage_(newWithSize)(THCState *state, long size) { THArgCheck(size >= 0, 2, "invalid size"); if(size > 0) { - THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); + THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); // update heap *before* attempting malloc, to free space for the malloc - THCHeapUpdate(state, size * sizeof(float)); + THCHeapUpdate(state, size * sizeof(real)); cudaError_t err = - THCudaMalloc(state, (void**)&(storage->data), size * sizeof(float)); + THCudaMalloc(state, (void**)&(storage->data), size * sizeof(real)); if(err != cudaSuccess){ - THCHeapUpdate(state, -size * sizeof(float)); + THCHeapUpdate(state, -size * sizeof(real)); } THCudaCheck(err); @@ -65,53 +65,53 @@ THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size) } else { - return THCudaStorage_new(state); + return THCStorage_(new)(state); } } -THCudaStorage* THCudaStorage_newWithSize1(THCState *state, float data0) +THCStorage* THCStorage_(newWithSize1)(THCState *state, real data0) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 1); - THCudaStorage_set(state, self, 0, data0); + THCStorage *self = THCStorage_(newWithSize)(state, 1); + THCStorage_(set)(state, self, 0, data0); return self; } -THCudaStorage* THCudaStorage_newWithSize2(THCState *state, float data0, float data1) +THCStorage* THCStorage_(newWithSize2)(THCState *state, real data0, real data1) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 2); - THCudaStorage_set(state, self, 0, data0); - THCudaStorage_set(state, self, 1, data1); + THCStorage *self = THCStorage_(newWithSize)(state, 2); + THCStorage_(set)(state, self, 0, data0); + THCStorage_(set)(state, self, 1, data1); return self; } -THCudaStorage* THCudaStorage_newWithSize3(THCState *state, float data0, float data1, float data2) +THCStorage* THCStorage_(newWithSize3)(THCState *state, real data0, real data1, real data2) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 3); - THCudaStorage_set(state, self, 0, data0); - THCudaStorage_set(state, self, 1, data1); - THCudaStorage_set(state, self, 2, data2); + THCStorage *self = THCStorage_(newWithSize)(state, 3); + THCStorage_(set)(state, self, 0, data0); + THCStorage_(set)(state, self, 1, data1); + THCStorage_(set)(state, self, 2, data2); return self; } -THCudaStorage* THCudaStorage_newWithSize4(THCState *state, float data0, float data1, float data2, float data3) +THCStorage* THCStorage_(newWithSize4)(THCState *state, real data0, real data1, real data2, real data3) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 4); - THCudaStorage_set(state, self, 0, data0); - THCudaStorage_set(state, self, 1, data1); - THCudaStorage_set(state, self, 2, data2); - THCudaStorage_set(state, self, 3, data3); + THCStorage *self = THCStorage_(newWithSize)(state, 4); + THCStorage_(set)(state, self, 0, data0); + THCStorage_(set)(state, self, 1, data1); + THCStorage_(set)(state, self, 2, data2); + THCStorage_(set)(state, self, 3, data3); return self; } -THCudaStorage* THCudaStorage_newWithMapping(THCState *state, const char *fileName, long size, int isShared) +THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, long size, int isShared) { - THError("not available yet for THCudaStorage"); + THError("not available yet for THCStorage"); return NULL; } -THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size) +THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size) { - THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); + THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); storage->data = data; storage->size = size; storage->refcount = 1; @@ -119,23 +119,23 @@ THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size return storage; } -void THCudaStorage_setFlag(THCState *state, THCudaStorage *storage, const char flag) +void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag) { storage->flag |= flag; } -void THCudaStorage_clearFlag(THCState *state, THCudaStorage *storage, const char flag) +void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag) { storage->flag &= ~flag; } -void THCudaStorage_retain(THCState *state, THCudaStorage *self) +void THCStorage_(retain)(THCState *state, THCStorage *self) { if(self && (self->flag & TH_STORAGE_REFCOUNTED)) THAtomicIncrementRef(&self->refcount); } -void THCudaStorage_free(THCState *state, THCudaStorage *self) +void THCStorage_(free)(THCState *state, THCStorage *self) { if(!(self->flag & TH_STORAGE_REFCOUNTED)) return; @@ -143,9 +143,10 @@ void THCudaStorage_free(THCState *state, THCudaStorage *self) if (THAtomicDecrementRef(&self->refcount)) { if(self->flag & TH_STORAGE_FREEMEM) { - THCHeapUpdate(state, -self->size * sizeof(float)); + THCHeapUpdate(state, -self->size * sizeof(real)); THCudaCheck(THCudaFree(state, self->data)); } THFree(self); } } +#endif diff --git a/lib/THC/generic/THCStorage.cu b/lib/THC/generic/THCStorage.cu index c61f08e..740b2bd 100644 --- a/lib/THC/generic/THCStorage.cu +++ b/lib/THC/generic/THCStorage.cu @@ -1,14 +1,10 @@ -#include "THCStorage.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorage.cu" +#else -#include <thrust/device_ptr.h> -#include <thrust/fill.h> -#if CUDA_VERSION >= 7000 -#include <thrust/system/cuda/execution_policy.h> -#endif - -void THCudaStorage_fill(THCState *state, THCudaStorage *self, float value) +void THCStorage_(fill)(THCState *state, THCStorage *self, real value) { - thrust::device_ptr<float> self_data(self->data); + thrust::device_ptr<real> self_data(self->data); thrust::fill( #if CUDA_VERSION >= 7000 thrust::cuda::par.on(THCState_getCurrentStream(state)), @@ -16,7 +12,7 @@ void THCudaStorage_fill(THCState *state, THCudaStorage *self, float value) self_data, self_data+self->size, value); } -void THCudaStorage_resize(THCState *state, THCudaStorage *self, long size) +void THCStorage_(resize)(THCState *state, THCStorage *self, long size) { THArgCheck(size >= 0, 2, "invalid size"); @@ -27,33 +23,34 @@ void THCudaStorage_resize(THCState *state, THCudaStorage *self, long size) { if(self->flag & TH_STORAGE_FREEMEM) { THCudaCheck(THCudaFree(state, self->data)); - THCHeapUpdate(state, -self->size * sizeof(float)); + THCHeapUpdate(state, -self->size * sizeof(real)); } self->data = NULL; self->size = 0; } else { - float *data = NULL; + real *data = NULL; // update heap *before* attempting malloc, to free space for the malloc - THCHeapUpdate(state, size * sizeof(float)); - cudaError_t err = THCudaMalloc(state, (void**)(&data), size * sizeof(float)); + THCHeapUpdate(state, size * sizeof(real)); + cudaError_t err = THCudaMalloc(state, (void**)(&data), size * sizeof(real)); if(err != cudaSuccess) { - THCHeapUpdate(state, -size * sizeof(float)); + THCHeapUpdate(state, -size * sizeof(real)); } THCudaCheck(err); if (self->data) { THCudaCheck(cudaMemcpyAsync(data, self->data, - THMin(self->size, size) * sizeof(float), + THMin(self->size, size) * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); THCudaCheck(THCudaFree(state, self->data)); - THCHeapUpdate(state, -self->size * sizeof(float)); + THCHeapUpdate(state, -self->size * sizeof(real)); } self->data = data; self->size = size; } } +#endif diff --git a/lib/THC/generic/THCStorage.h b/lib/THC/generic/THCStorage.h index 6549e5c..a8c5f5f 100644 --- a/lib/THC/generic/THCStorage.h +++ b/lib/THC/generic/THCStorage.h @@ -1,57 +1,54 @@ -#ifndef THC_STORAGE_INC -#define THC_STORAGE_INC - -#include "THStorage.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorage.h" +#else #define TH_STORAGE_REFCOUNTED 1 #define TH_STORAGE_RESIZABLE 2 #define TH_STORAGE_FREEMEM 4 - -typedef struct THCudaStorage +typedef struct THCStorage { - float *data; + real *data; long size; int refcount; char flag; THAllocator *allocator; void *allocatorContext; - struct THCudaStorage *view; -} THCudaStorage; + struct THCStorage *view; +} THCStorage; -THC_API float* THCudaStorage_data(THCState *state, const THCudaStorage*); -THC_API long THCudaStorage_size(THCState *state, const THCudaStorage*); -THC_API int THCudaStorage_elementSize(THCState *state); +THC_API real* THCStorage_(data)(THCState *state, const THCStorage*); +THC_API long THCStorage_(size)(THCState *state, const THCStorage*); +THC_API int THCStorage_(elementSize)(THCState *state); /* slow access -- checks everything */ -THC_API void THCudaStorage_set(THCState *state, THCudaStorage*, long, float); -THC_API float THCudaStorage_get(THCState *state, const THCudaStorage*, long); +THC_API void THCStorage_(set)(THCState *state, THCStorage*, long, real); +THC_API real THCStorage_(get)(THCState *state, const THCStorage*, long); -THC_API THCudaStorage* THCudaStorage_new(THCState *state); -THC_API THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size); -THC_API THCudaStorage* THCudaStorage_newWithSize1(THCState *state, float); -THC_API THCudaStorage* THCudaStorage_newWithSize2(THCState *state, float, float); -THC_API THCudaStorage* THCudaStorage_newWithSize3(THCState *state, float, float, float); -THC_API THCudaStorage* THCudaStorage_newWithSize4(THCState *state, float, float, float, float); -THC_API THCudaStorage* THCudaStorage_newWithMapping(THCState *state, const char *filename, long size, int shared); +THC_API THCStorage* THCStorage_(new)(THCState *state); +THC_API THCStorage* THCStorage_(newWithSize)(THCState *state, long size); +THC_API THCStorage* THCStorage_(newWithSize1)(THCState *state, real); +THC_API THCStorage* THCStorage_(newWithSize2)(THCState *state, real, real); +THC_API THCStorage* THCStorage_(newWithSize3)(THCState *state, real, real, real); +THC_API THCStorage* THCStorage_(newWithSize4)(THCState *state, real, real, real, real); +THC_API THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *filename, long size, int shared); /* takes ownership of data */ -THC_API THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size); +THC_API THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size); -THC_API THCudaStorage* THCudaStorage_newWithAllocator(THCState *state, long size, +THC_API THCStorage* THCStorage_(newWithAllocator)(THCState *state, long size, THAllocator* allocator, void *allocatorContext); -THC_API THCudaStorage* THCudaStorage_newWithDataAndAllocator( - THCState *state, float* data, long size, THAllocator* allocator, void *allocatorContext); +THC_API THCStorage* THCStorage_(newWithDataAndAllocator)( + THCState *state, real* data, long size, THAllocator* allocator, void *allocatorContext); -THC_API void THCudaStorage_setFlag(THCState *state, THCudaStorage *storage, const char flag); -THC_API void THCudaStorage_clearFlag(THCState *state, THCudaStorage *storage, const char flag); -THC_API void THCudaStorage_retain(THCState *state, THCudaStorage *storage); +THC_API void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag); +THC_API void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag); +THC_API void THCStorage_(retain)(THCState *state, THCStorage *storage); -THC_API void THCudaStorage_free(THCState *state, THCudaStorage *storage); -THC_API void THCudaStorage_resize(THCState *state, THCudaStorage *storage, long size); -THC_API void THCudaStorage_fill(THCState *state, THCudaStorage *storage, float value); +THC_API void THCStorage_(free)(THCState *state, THCStorage *storage); +THC_API void THCStorage_(resize)(THCState *state, THCStorage *storage, long size); +THC_API void THCStorage_(fill)(THCState *state, THCStorage *storage, real value); #endif diff --git a/lib/THC/generic/THCStorageCopy.c b/lib/THC/generic/THCStorageCopy.c index 3517b2c..1696307 100644 --- a/lib/THC/generic/THCStorageCopy.c +++ b/lib/THC/generic/THCStorageCopy.c @@ -1,21 +1,26 @@ -#include "THCStorageCopy.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorageCopy.c" +#else -void THCudaStorage_copyFloat(THCState *state, THCudaStorage *self, struct THFloatStorage *src) +void THCStorage_(copyCPU)(THCState *state, THCStorage *self, struct THStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(float), cudaMemcpyHostToDevice)); + THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(real), cudaMemcpyHostToDevice)); } -#define TH_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC) \ - void THCudaStorage_copy##TYPEC(THCState *state, THCudaStorage *self, struct TH##TYPEC##Storage *src) \ - { \ - THFloatStorage *buffer; \ - THArgCheck(self->size == src->size, 2, "size does not match"); \ - buffer = THFloatStorage_newWithSize(src->size); \ - THFloatStorage_copy##TYPEC(buffer, src); \ - THCudaStorage_copyFloat(state, self, buffer); \ - THFloatStorage_free(buffer); \ +#define TH_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC) \ + void THCStorage_(copy##TYPEC)(THCState *state, THCStorage *self, struct TH##TYPEC##Storage *src) \ + { \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCStorage_(copyCPU)(state, self, (THStorage*) src); /* cast just removes compiler warning */ \ + } else { \ + THStorage *buffer; \ + THArgCheck(self->size == src->size, 2, "size does not match"); \ + buffer = THStorage_(newWithSize)(src->size); \ + THStorage_(copy##TYPEC)(buffer, src); \ + THCStorage_(copyCPU)(state, self, buffer); \ + THStorage_(free)(buffer); \ + } \ } TH_CUDA_STORAGE_IMPLEMENT_COPY(Byte) @@ -23,23 +28,28 @@ TH_CUDA_STORAGE_IMPLEMENT_COPY(Char) TH_CUDA_STORAGE_IMPLEMENT_COPY(Short) TH_CUDA_STORAGE_IMPLEMENT_COPY(Int) TH_CUDA_STORAGE_IMPLEMENT_COPY(Long) +TH_CUDA_STORAGE_IMPLEMENT_COPY(Float) TH_CUDA_STORAGE_IMPLEMENT_COPY(Double) -void THFloatStorage_copyCuda(THCState *state, THFloatStorage *self, struct THCudaStorage *src) +void THStorage_(copyCuda)(THCState *state, THStorage *self, struct THCStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToHost)); + THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToHost)); } -#define TH_CUDA_STORAGE_IMPLEMENT_COPYTO(TYPEC) \ - void TH##TYPEC##Storage_copyCuda(THCState *state, TH##TYPEC##Storage *self, struct THCudaStorage *src) \ +#define TH_CUDA_STORAGE_IMPLEMENT_COPYTO(TYPEC) \ + void TH_CONCAT_4(TH,TYPEC,Storage_copyCuda,Real)(THCState *state, TH##TYPEC##Storage *self, struct THCStorage *src) \ { \ - THFloatStorage *buffer; \ - THArgCheck(self->size == src->size, 2, "size does not match"); \ - buffer = THFloatStorage_newWithSize(src->size); \ - THFloatStorage_copyCuda(state, buffer, src); \ - TH##TYPEC##Storage_copyFloat(self, buffer); \ - THFloatStorage_free(buffer); \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THStorage_(copyCuda)(state, (THStorage*) self, src); /* cast just removes compiler warnings */ \ + } else { \ + THStorage *buffer; \ + THArgCheck(self->size == src->size, 2, "size does not match"); \ + buffer = THStorage_(newWithSize)(src->size); \ + THStorage_(copyCuda)(state, buffer, src); \ + TH_CONCAT_4(TH,TYPEC,Storage_copy,Real)(self, buffer); \ + THStorage_(free)(buffer); \ + } \ } TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Byte) @@ -47,4 +57,7 @@ TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Char) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Short) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Int) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Long) +TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Float) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Double) + +#endif diff --git a/lib/THC/generic/THCStorageCopy.cu b/lib/THC/generic/THCStorageCopy.cu index c3205b5..ffe8332 100644 --- a/lib/THC/generic/THCStorageCopy.cu +++ b/lib/THC/generic/THCStorageCopy.cu @@ -1,19 +1,49 @@ -#include "THCStorageCopy.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorageCopy.cu" +#else -void THCudaStorage_rawCopy(THCState *state, THCudaStorage *self, float *src) +void THCStorage_(rawCopy)(THCState *state, THCStorage *self, real *src) { - THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); + THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); } -void THCudaStorage_copy(THCState *state, THCudaStorage *self, THCudaStorage *src) +void THCStorage_(copy)(THCState *state, THCStorage *self, THCStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); + THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); } -void THCudaStorage_copyCuda(THCState *state, THCudaStorage *self, THCudaStorage *src) +void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); + THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); } + +// conversions are mediated by the CPU +// yes, this is slow; feel free to write CUDA kernels for this +#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA) \ + void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src) \ + { \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCStorage_(copy)(state, self, (THCStorage*) src); /* cast just removes compiler warning */ \ + } else { \ + THArgCheck(self->size == src->size, 2, "size does not match"); \ + TH##TYPEC##Storage *buffer1 = TH##TYPEC##Storage_newWithSize(src->size); \ + THStorage *buffer2 = THStorage_(newWithSize)(src->size); \ + TH##TYPEC##Storage_copyCuda(state, buffer1, src); \ + THStorage_(copy##TYPEC)(buffer2, buffer1); \ + THCStorage_(copyCPU)(state, self, buffer2); \ + TH##TYPEC##Storage_free(buffer1); \ + THStorage_(free)(buffer2); \ + } \ + } + +THC_CUDA_STORAGE_IMPLEMENT_COPY(Byte,Byte) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Char,Char) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Short,Short) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Int,Int) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Long,Long) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Float,) // i.e. float +THC_CUDA_STORAGE_IMPLEMENT_COPY(Double,Double) + +#endif diff --git a/lib/THC/generic/THCStorageCopy.h b/lib/THC/generic/THCStorageCopy.h index 4c2f8df..d80661d 100644 --- a/lib/THC/generic/THCStorageCopy.h +++ b/lib/THC/generic/THCStorageCopy.h @@ -1,28 +1,37 @@ -#ifndef THC_STORAGE_COPY_INC -#define THC_STORAGE_COPY_INC - -#include "THCStorage.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorageCopy.h" +#else /* Support for copy between different Storage types */ -THC_API void THCudaStorage_rawCopy(THCState *state, THCudaStorage *storage, float *src); -THC_API void THCudaStorage_copy(THCState *state, THCudaStorage *storage, THCudaStorage *src); -THC_API void THCudaStorage_copyByte(THCState *state, THCudaStorage *storage, struct THByteStorage *src); -THC_API void THCudaStorage_copyChar(THCState *state, THCudaStorage *storage, struct THCharStorage *src); -THC_API void THCudaStorage_copyShort(THCState *state, THCudaStorage *storage, struct THShortStorage *src); -THC_API void THCudaStorage_copyInt(THCState *state, THCudaStorage *storage, struct THIntStorage *src); -THC_API void THCudaStorage_copyLong(THCState *state, THCudaStorage *storage, struct THLongStorage *src); -THC_API void THCudaStorage_copyFloat(THCState *state, THCudaStorage *storage, struct THFloatStorage *src); -THC_API void THCudaStorage_copyDouble(THCState *state, THCudaStorage *storage, struct THDoubleStorage *src); +THC_API void THCStorage_(rawCopy)(THCState *state, THCStorage *storage, real *src); +THC_API void THCStorage_(copy)(THCState *state, THCStorage *storage, THCStorage *src); +THC_API void THCStorage_(copyByte)(THCState *state, THCStorage *storage, struct THByteStorage *src); +THC_API void THCStorage_(copyChar)(THCState *state, THCStorage *storage, struct THCharStorage *src); +THC_API void THCStorage_(copyShort)(THCState *state, THCStorage *storage, struct THShortStorage *src); +THC_API void THCStorage_(copyInt)(THCState *state, THCStorage *storage, struct THIntStorage *src); +THC_API void THCStorage_(copyLong)(THCState *state, THCStorage *storage, struct THLongStorage *src); +THC_API void THCStorage_(copyFloat)(THCState *state, THCStorage *storage, struct THFloatStorage *src); +THC_API void THCStorage_(copyDouble)(THCState *state, THCStorage *storage, struct THDoubleStorage *src); + +THC_API void THCStorage_(copyCudaByte)(THCState *state, THCStorage *storage, struct THCudaByteStorage *src); +THC_API void THCStorage_(copyCudaChar)(THCState *state, THCStorage *storage, struct THCudaCharStorage *src); +THC_API void THCStorage_(copyCudaShort)(THCState *state, THCStorage *storage, struct THCudaShortStorage *src); +THC_API void THCStorage_(copyCudaInt)(THCState *state, THCStorage *storage, struct THCudaIntStorage *src); +THC_API void THCStorage_(copyCudaLong)(THCState *state, THCStorage *storage, struct THCudaLongStorage *src); +THC_API void THCStorage_(copyCudaFloat)(THCState *state, THCStorage *storage, struct THCudaStorage *src); +THC_API void THCStorage_(copyCudaDouble)(THCState *state, THCStorage *storage, struct THCudaDoubleStorage *src); + -THC_API void THByteStorage_copyCuda(THCState *state, THByteStorage *self, struct THCudaStorage *src); -THC_API void THCharStorage_copyCuda(THCState *state, THCharStorage *self, struct THCudaStorage *src); -THC_API void THShortStorage_copyCuda(THCState *state, THShortStorage *self, struct THCudaStorage *src); -THC_API void THIntStorage_copyCuda(THCState *state, THIntStorage *self, struct THCudaStorage *src); -THC_API void THLongStorage_copyCuda(THCState *state, THLongStorage *self, struct THCudaStorage *src); -THC_API void THFloatStorage_copyCuda(THCState *state, THFloatStorage *self, struct THCudaStorage *src); -THC_API void THDoubleStorage_copyCuda(THCState *state, THDoubleStorage *self, struct THCudaStorage *src); -THC_API void THCudaStorage_copyCuda(THCState *state, THCudaStorage *self, THCudaStorage *src); +THC_API void TH_CONCAT_2(THByteStorage_copyCuda , Real)(THCState *state, THByteStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THCharStorage_copyCuda , Real)(THCState *state, THCharStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THShortStorage_copyCuda , Real)(THCState *state, THShortStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THIntStorage_copyCuda , Real)(THCState *state, THIntStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THLongStorage_copyCuda , Real)(THCState *state, THLongStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THFloatStorage_copyCuda , Real)(THCState *state, THFloatStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THDoubleStorage_copyCuda, Real)(THCState *state, THDoubleStorage *self, struct THCStorage *src); +THC_API void THStorage_(copyCuda)(THCState *state, THStorage *self, THCStorage *src); +THC_API void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src); +THC_API void THCStorage_(copyCPU)(THCState *state, THCStorage *self, THStorage *src); #endif diff --git a/lib/THC/generic/THCTensor.c b/lib/THC/generic/THCTensor.c index fa325e3..6645c0b 100644 --- a/lib/THC/generic/THCTensor.c +++ b/lib/THC/generic/THCTensor.c @@ -1,51 +1,50 @@ -#include "THCGeneral.h" -#include "THCTensor.h" -#include "THCTensorCopy.h" -#include "THAtomic.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensor.c" +#else /**** access methods ****/ -THCudaStorage *THCudaTensor_storage(THCState *state, const THCudaTensor *self) +THCStorage *THCTensor_(storage)(THCState *state, const THCTensor *self) { return self->storage; } -long THCudaTensor_storageOffset(THCState *state, const THCudaTensor *self) +long THCTensor_(storageOffset)(THCState *state, const THCTensor *self) { return self->storageOffset; } -int THCudaTensor_nDimension(THCState *state, const THCudaTensor *self) +int THCTensor_(nDimension)(THCState *state, const THCTensor *self) { return self->nDimension; } -long THCudaTensor_size(THCState *state, const THCudaTensor *self, int dim) +long THCTensor_(size)(THCState *state, const THCTensor *self, int dim) { THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "out of range"); return self->size[dim]; } -long THCudaTensor_stride(THCState *state, const THCudaTensor *self, int dim) +long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim) { THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "out of range"); return self->stride[dim]; } -THLongStorage *THCudaTensor_newSizeOf(THCState *state, THCudaTensor *self) +THLongStorage *THCTensor_(newSizeOf)(THCState *state, THCTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(size, self->size); return size; } -THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self) +THLongStorage *THCTensor_(newStrideOf)(THCState *state, THCTensor *self) { THLongStorage *stride = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(stride, self->stride); return stride; } -float *THCudaTensor_data(THCState *state, const THCudaTensor *self) +real *THCTensor_(data)(THCState *state, const THCTensor *self) { if(self->storage) return (self->storage->data+self->storageOffset); @@ -53,36 +52,36 @@ float *THCudaTensor_data(THCState *state, const THCudaTensor *self) return NULL; } -void THCudaTensor_setFlag(THCState *state, THCudaTensor *self, const char flag) +void THCTensor_(setFlag)(THCState *state, THCTensor *self, const char flag) { self->flag |= flag; } -void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag) +void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag) { self->flag &= ~flag; } /**** creation methods ****/ -static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self); -static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride); +static void THCTensor_(rawInit)(THCState *state, THCTensor *self); +static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride); /* Empty init */ -THCudaTensor *THCudaTensor_new(THCState *state) +THCTensor *THCTensor_(new)(THCState *state) { - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); return self; } /* Pointer-copy init */ -THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor) +THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor) { - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawSet(state, + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); + THCTensor_(rawSet)(state, self, tensor->storage, tensor->storageOffset, @@ -93,14 +92,14 @@ THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor) } /* Storage init */ -THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride) +THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride) { - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); if(size && stride) THArgCheck(size->size == stride->size, 4, "inconsistent size"); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawSet(state, + THCTensor_(rawInit)(state, self); + THCTensor_(rawSet)(state, self, storage, storageOffset, @@ -110,28 +109,28 @@ THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storag return self; } -THCudaTensor *THCudaTensor_newWithStorage1d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0) { - return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1); + return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1); } -THCudaTensor *THCudaTensor_newWithStorage2d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1) { - return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1); + return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1); } -THCudaTensor *THCudaTensor_newWithStorage3d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2) { - return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1); + return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1); } -THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2, @@ -140,102 +139,102 @@ THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *stor long size[4] = {size0, size1, size2, size3}; long stride[4] = {stride0, stride1, stride2, stride3}; - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawSet(state, self, storage, storageOffset, 4, size, stride); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); + THCTensor_(rawSet)(state, self, storage, storageOffset, 4, size, stride); return self; } -THCudaTensor *THCudaTensor_newWithSize(THCState *state, THLongStorage *size, THLongStorage *stride) +THCTensor *THCTensor_(newWithSize)(THCState *state, THLongStorage *size, THLongStorage *stride) { - return THCudaTensor_newWithStorage(state, NULL, 0, size, stride); + return THCTensor_(newWithStorage)(state, NULL, 0, size, stride); } -THCudaTensor *THCudaTensor_newWithSize1d(THCState *state, long size0) +THCTensor *THCTensor_(newWithSize1d)(THCState *state, long size0) { - return THCudaTensor_newWithSize4d(state, size0, -1, -1, -1); + return THCTensor_(newWithSize4d)(state, size0, -1, -1, -1); } -THCudaTensor *THCudaTensor_newWithSize2d(THCState *state, long size0, long size1) +THCTensor *THCTensor_(newWithSize2d)(THCState *state, long size0, long size1) { - return THCudaTensor_newWithSize4d(state, size0, size1, -1, -1); + return THCTensor_(newWithSize4d)(state, size0, size1, -1, -1); } -THCudaTensor *THCudaTensor_newWithSize3d(THCState *state, long size0, long size1, long size2) +THCTensor *THCTensor_(newWithSize3d)(THCState *state, long size0, long size1, long size2) { - return THCudaTensor_newWithSize4d(state, size0, size1, size2, -1); + return THCTensor_(newWithSize4d)(state, size0, size1, size2, -1); } -THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0, long size1, long size2, long size3) +THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawResize(state, self, 4, size, NULL); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); + THCTensor_(rawResize)(state, self, 4, size, NULL); return self; } -THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self) +THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self) { - THCudaTensor *tensor = THCudaTensor_new(state); - THCudaTensor_resizeAs(state, tensor, self); - THCudaTensor_copy(state, tensor, self); + THCTensor *tensor = THCTensor_(new)(state); + THCTensor_(resizeAs)(state, tensor, self); + THCTensor_(copy)(state, tensor, self); return tensor; } -THCudaTensor *THCudaTensor_newContiguous(THCState *state, THCudaTensor *self) +THCTensor *THCTensor_(newContiguous)(THCState *state, THCTensor *self) { - if(!THCudaTensor_isContiguous(state, self)) - return THCudaTensor_newClone(state, self); + if(!THCTensor_(isContiguous)(state, self)) + return THCTensor_(newClone)(state, self); else { - THCudaTensor_retain(state, self); + THCTensor_(retain)(state, self); return self; } } -THCudaTensor *THCudaTensor_newSelect(THCState *state, THCudaTensor *tensor, int dimension_, long sliceIndex_) +THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int dimension_, long sliceIndex_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_select(state, self, NULL, dimension_, sliceIndex_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(select)(state, self, NULL, dimension_, sliceIndex_); return self; } -THCudaTensor *THCudaTensor_newNarrow(THCState *state, THCudaTensor *tensor, int dimension_, long firstIndex_, long size_) +THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_narrow(state, self, NULL, dimension_, firstIndex_, size_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, self, NULL, dimension_, firstIndex_, size_); return self; } -THCudaTensor *THCudaTensor_newTranspose(THCState *state, THCudaTensor *tensor, int dimension1_, int dimension2_) +THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_transpose(state, self, NULL, dimension1_, dimension2_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(transpose)(state, self, NULL, dimension1_, dimension2_); return self; } -THCudaTensor *THCudaTensor_newUnfold(THCState *state, THCudaTensor *tensor, int dimension_, long size_, long step_) +THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_unfold(state, self, NULL, dimension_, size_, step_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(unfold)(state, self, NULL, dimension_, size_, step_); return self; } /* Resize */ -void THCudaTensor_resize(THCState *state, THCudaTensor *self, THLongStorage *size, THLongStorage *stride) +void THCTensor_(resize)(THCState *state, THCTensor *self, THLongStorage *size, THLongStorage *stride) { THArgCheck(size != NULL, 2, "invalid size"); if(stride) THArgCheck(stride->size == size->size, 3, "invalid stride"); - THCudaTensor_rawResize(state, self, size->size, size->data, (stride ? stride->data : NULL)); + THCTensor_(rawResize)(state, self, size->size, size->data, (stride ? stride->data : NULL)); } -void THCudaTensor_resizeAs(THCState *state, THCudaTensor *self, THCudaTensor *src) +void THCTensor_(resizeAs)(THCState *state, THCTensor *self, THCTensor *src) { int isSame = 0; int d; @@ -253,42 +252,42 @@ void THCudaTensor_resizeAs(THCState *state, THCudaTensor *self, THCudaTensor *sr } if(!isSame) - THCudaTensor_rawResize(state, self, src->nDimension, src->size, NULL); + THCTensor_(rawResize)(state, self, src->nDimension, src->size, NULL); } -void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0) +void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0) { - THCudaTensor_resize4d(state, tensor, size0, -1, -1, -1); + THCTensor_(resize4d)(state, tensor, size0, -1, -1, -1); } -void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long size0, long size1) +void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0, long size1) { - THCudaTensor_resize4d(state, tensor, size0, size1, -1, -1); + THCTensor_(resize4d)(state, tensor, size0, size1, -1, -1); } -void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0, long size1, long size2) +void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0, long size1, long size2) { - THCudaTensor_resize4d(state, tensor, size0, size1, size2, -1); + THCTensor_(resize4d)(state, tensor, size0, size1, size2, -1); } -void THCudaTensor_resize4d(THCState *state, THCudaTensor *self, long size0, long size1, long size2, long size3) +void THCTensor_(resize4d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; - THCudaTensor_rawResize(state, self, 4, size, NULL); + THCTensor_(rawResize)(state, self, 4, size, NULL); } -void THCudaTensor_resize5d(THCState *state, THCudaTensor *self, long size0, long size1, long size2, long size3, long size4) +void THCTensor_(resize5d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3, long size4) { long size[5] = {size0, size1, size2, size3, size4}; - THCudaTensor_rawResize(state, self, 5, size, NULL); + THCTensor_(rawResize)(state, self, 5, size, NULL); } -void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src) +void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src) { if(self != src) - THCudaTensor_rawSet(state, + THCTensor_(rawSet)(state, self, src->storage, src->storageOffset, @@ -297,12 +296,12 @@ void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src) src->stride); } -void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_) +void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_) { if(size_ && stride_) THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes"); - THCudaTensor_rawSet(state, + THCTensor_(rawSet)(state, self, storage_, storageOffset_, @@ -311,40 +310,40 @@ void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage (stride_ ? stride_->data : NULL)); } -void THCudaTensor_setStorage1d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_) { - THCudaTensor_setStorage4d(state, self, storage_, storageOffset_, + THCTensor_(setStorage4d)(state, self, storage_, storageOffset_, size0_, stride0_, -1, -1, -1, -1, -1, -1); } -void THCudaTensor_setStorage2d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_) { - THCudaTensor_setStorage4d(state, self, storage_, storageOffset_, + THCTensor_(setStorage4d)(state, self, storage_, storageOffset_, size0_, stride0_, size1_, stride1_, -1, -1, -1, -1); } -void THCudaTensor_setStorage3d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_) { - THCudaTensor_setStorage4d(state, self, storage_, storageOffset_, + THCTensor_(setStorage4d)(state, self, storage_, storageOffset_, size0_, stride0_, size1_, stride1_, size2_, stride2_, -1, -1); } -void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, @@ -354,11 +353,11 @@ void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorag long size[4] = {size0_, size1_, size2_, size3_}; long stride[4] = {stride0_, stride1_, stride2_, stride3_}; - THCudaTensor_rawSet(state, self, storage_, storageOffset_, 4, size, stride); + THCTensor_(rawSet)(state, self, storage_, storageOffset_, 4, size, stride); } -void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long firstIndex, long size) +void THCTensor_(narrow)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long firstIndex, long size) { if(!src) src = self; @@ -367,7 +366,7 @@ void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 4, "out of range"); THArgCheck( (size > 0) && (firstIndex+size <= src->size[dimension]), 5, "out of range"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); if(firstIndex > 0) self->storageOffset += firstIndex*self->stride[dimension]; @@ -375,7 +374,7 @@ void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, self->size[dimension] = size; } -void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long sliceIndex) +void THCTensor_(select)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long sliceIndex) { int d; @@ -386,8 +385,8 @@ void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, THArgCheck((dimension >= 0) && (dimension < src->nDimension), 3, "out of range"); THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 4, "out of range"); - THCudaTensor_set(state, self, src); - THCudaTensor_narrow(state, self, NULL, dimension, sliceIndex, 1); + THCTensor_(set)(state, self, src); + THCTensor_(narrow)(state, self, NULL, dimension, sliceIndex, 1); for(d = dimension; d < self->nDimension-1; d++) { self->size[d] = self->size[d+1]; @@ -396,7 +395,7 @@ void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, self->nDimension--; } -void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1, int dimension2) +void THCTensor_(transpose)(THCState *state, THCTensor *self, THCTensor *src, int dimension1, int dimension2) { long z; @@ -406,7 +405,7 @@ void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *s THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range"); THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); if(dimension1 == dimension2) return; @@ -419,7 +418,7 @@ void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *s self->size[dimension2] = z; } -void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long size, long step) +void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long size, long step) { long *newSize; long *newStride; @@ -433,7 +432,7 @@ void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, THArgCheck(size <= src->size[dimension], 3, "out of range"); THArgCheck(step > 0, 4, "invalid step"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); newSize = (long*)THAlloc(sizeof(long)*(self->nDimension+1)); newStride = (long*)THAlloc(sizeof(long)*(self->nDimension+1)); @@ -463,7 +462,7 @@ void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, } /* we have to handle the case where the result is a number */ -void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src) +void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src) { int ndim = 0; int d; @@ -471,7 +470,7 @@ void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src if(!src) src = self; - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); for(d = 0; d < src->nDimension; d++) { @@ -496,7 +495,7 @@ void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src self->nDimension = ndim; } -void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension) +void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension) { int d; @@ -505,7 +504,7 @@ void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *s THArgCheck(dimension < src->nDimension, 3, "dimension out of range"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); if(src->size[dimension] == 1 && src->nDimension > 1) { @@ -518,7 +517,7 @@ void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *s } } -int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self) +int THCTensor_(isContiguous)(THCState *state, const THCTensor *self) { long z = 1; int d; @@ -535,7 +534,7 @@ int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self) return 1; } -int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongStorage *dims) +int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims) { int d; if (self->nDimension != dims->size) @@ -549,7 +548,7 @@ int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongS return 1; } -int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCudaTensor *src) +int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src) { if (self->storage == src->storage && self->storageOffset == src->storageOffset && @@ -566,7 +565,7 @@ int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCuda return 0; } -int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const THCudaTensor* src) +int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor* src) { int d; if (self->nDimension != src->nDimension) @@ -579,7 +578,7 @@ int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const T return 1; } -long THCudaTensor_nElement(THCState *state, const THCudaTensor *self) +long THCTensor_(nElement)(THCState *state, const THCTensor *self) { if(self->nDimension == 0) return 0; @@ -593,13 +592,13 @@ long THCudaTensor_nElement(THCState *state, const THCudaTensor *self) } } -void THCudaTensor_retain(THCState *state, THCudaTensor *self) +void THCTensor_(retain)(THCState *state, THCTensor *self) { if(self->flag & TH_TENSOR_REFCOUNTED) THAtomicIncrementRef(&self->refcount); } -void THCudaTensor_free(THCState *state, THCudaTensor *self) +void THCTensor_(free)(THCState *state, THCTensor *self) { if(!self) return; @@ -611,23 +610,23 @@ void THCudaTensor_free(THCState *state, THCudaTensor *self) THFree(self->size); THFree(self->stride); if(self->storage) - THCudaStorage_free(state, self->storage); + THCStorage_(free)(state, self->storage); THFree(self); } } } -void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst) +void THCTensor_(freeCopyTo)(THCState *state, THCTensor *self, THCTensor *dst) { if(self != dst) - THCudaTensor_copy(state, dst, self); + THCTensor_(copy)(state, dst, self); - THCudaTensor_free(state, self); + THCTensor_(free)(state, self); } /*******************************************************************************/ -static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self) +static void THCTensor_(rawInit)(THCState *state, THCTensor *self) { self->refcount = 1; self->storage = NULL; @@ -638,18 +637,18 @@ static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self) self->flag = TH_TENSOR_REFCOUNTED; } -static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride) +static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride) { /* storage */ if(self->storage != storage) { if(self->storage) - THCudaStorage_free(state, self->storage); + THCStorage_(free)(state, self->storage); if(storage) { self->storage = storage; - THCudaStorage_retain(state, self->storage); + THCStorage_(retain)(state, self->storage); } else self->storage = NULL; @@ -661,10 +660,10 @@ static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStora self->storageOffset = storageOffset; /* size and stride */ - THCudaTensor_rawResize(state, self, nDimension, size, stride); + THCTensor_(rawResize)(state, self, nDimension, size, stride); } -void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride) +void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride) { int d; int nDimension_; @@ -722,72 +721,72 @@ void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, if(totalSize+self->storageOffset > 0) { if(!self->storage) - self->storage = THCudaStorage_new(state); + self->storage = THCStorage_(new)(state); if(totalSize+self->storageOffset > self->storage->size) - THCudaStorage_resize(state, self->storage, totalSize+self->storageOffset); + THCStorage_(resize)(state, self->storage, totalSize+self->storageOffset); } } else self->nDimension = 0; } -void THCudaTensor_set1d(THCState *state, THCudaTensor *tensor, long x0, float value) +void THCTensor_(set1d)(THCState *state, THCTensor *tensor, long x0, real value) { THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value); } -float THCudaTensor_get1d(THCState *state, const THCudaTensor *tensor, long x0) +real THCTensor_(get1d)(THCState *state, const THCTensor *tensor, long x0) { THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]); } -void THCudaTensor_set2d(THCState *state, THCudaTensor *tensor, long x0, long x1, float value) +void THCTensor_(set2d)(THCState *state, THCTensor *tensor, long x0, long x1, real value) { THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value); } -float THCudaTensor_get2d(THCState *state, const THCudaTensor *tensor, long x0, long x1) +real THCTensor_(get2d)(THCState *state, const THCTensor *tensor, long x0, long x1) { THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]); } -void THCudaTensor_set3d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, float value) +void THCTensor_(set3d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, real value) { THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value); } -float THCudaTensor_get3d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2) +real THCTensor_(get3d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2) { THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]); } -void THCudaTensor_set4d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, long x3, float value) +void THCTensor_(set4d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, long x3, real value) { THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value); } -float THCudaTensor_get4d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2, long x3) +real THCTensor_(get4d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2, long x3) { THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]); } -int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...) +int THCTensor_(checkGPU)(THCState *state, unsigned int nTensors, ...) { #ifdef DISABLE_CHECK_GPU return 1; // Disable GPU checks. @@ -798,11 +797,11 @@ int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...) va_start(args, nTensors); int valid = 1; for (unsigned int i = 0; i < nTensors; i++) { - THCudaTensor* tensor = va_arg(args, THCudaTensor*); + THCTensor* tensor = va_arg(args, THCTensor*); if (tensor == NULL) { continue; } - int tensorDev = THCudaTensor_getDevice(state, tensor); + int tensorDev = THCTensor_(getDevice)(state, tensor); if (tensorDev != -1 && tensorDev != curDev) { valid = 0; break; @@ -812,3 +811,5 @@ int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...) return valid; #endif } + +#endif diff --git a/lib/THC/generic/THCTensor.cu b/lib/THC/generic/THCTensor.cu index 3972ee3..beb96ec 100644 --- a/lib/THC/generic/THCTensor.cu +++ b/lib/THC/generic/THCTensor.cu @@ -1,14 +1,16 @@ -#include "THCTensor.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensor.cu" +#else -cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor *self) +cudaTextureObject_t THCTensor_(getTextureObject)(THCState *state, THCTensor *self) { - THAssert(THCudaTensor_checkGPU(state, 1, self)); + THAssert(THCTensor_(checkGPU)(state, 1, self)); cudaTextureObject_t texObj; struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeLinear; - resDesc.res.linear.devPtr = THCudaTensor_data(state, self); - resDesc.res.linear.sizeInBytes = THCudaTensor_nElement(state, self) * 4; + resDesc.res.linear.devPtr = THCTensor_(data)(state, self); + resDesc.res.linear.sizeInBytes = THCTensor_(nElement)(state, self) * 4; resDesc.res.linear.desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); struct cudaTextureDesc texDesc; @@ -16,19 +18,21 @@ cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL); cudaError errcode = cudaGetLastError(); if(errcode != cudaSuccess) { - if (THCudaTensor_nElement(state, self) > 2>>27) + if (THCTensor_(nElement)(state, self) > 2>>27) THError("Failed to create texture object, " "nElement:%ld exceeds 27-bit addressing required for tex1Dfetch. Cuda Error: %s", - THCudaTensor_nElement(state, self), cudaGetErrorString(errcode)); + THCTensor_(nElement)(state, self), cudaGetErrorString(errcode)); else THError("Failed to create texture object: %s", cudaGetErrorString(errcode)); } return texObj; } -THC_API int THCudaTensor_getDevice(THCState* state, const THCudaTensor* thc) { +THC_API int THCTensor_(getDevice)(THCState* state, const THCTensor* thc) { if (!thc->storage) return -1; cudaPointerAttributes attr; THCudaCheck(cudaPointerGetAttributes(&attr, thc->storage->data)); return attr.device; } + +#endif diff --git a/lib/THC/generic/THCTensor.h b/lib/THC/generic/THCTensor.h index 27689e8..175eaee 100644 --- a/lib/THC/generic/THCTensor.h +++ b/lib/THC/generic/THCTensor.h @@ -1,133 +1,130 @@ -#ifndef THC_TENSOR_INC -#define THC_TENSOR_INC - -#include "THTensor.h" -#include "THCStorage.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensor.h" +#else #define TH_TENSOR_REFCOUNTED 1 -typedef struct THCudaTensor +typedef struct THCTensor { long *size; long *stride; int nDimension; - THCudaStorage *storage; + THCStorage *storage; long storageOffset; int refcount; char flag; -} THCudaTensor; +} THCTensor; /**** access methods ****/ -THC_API THCudaStorage* THCudaTensor_storage(THCState *state, const THCudaTensor *self); -THC_API long THCudaTensor_storageOffset(THCState *state, const THCudaTensor *self); -THC_API int THCudaTensor_nDimension(THCState *state, const THCudaTensor *self); -THC_API long THCudaTensor_size(THCState *state, const THCudaTensor *self, int dim); -THC_API long THCudaTensor_stride(THCState *state, const THCudaTensor *self, int dim); -THC_API THLongStorage *THCudaTensor_newSizeOf(THCState *state, THCudaTensor *self); -THC_API THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self); -THC_API float *THCudaTensor_data(THCState *state, const THCudaTensor *self); +THC_API THCStorage* THCTensor_(storage)(THCState *state, const THCTensor *self); +THC_API long THCTensor_(storageOffset)(THCState *state, const THCTensor *self); +THC_API int THCTensor_(nDimension)(THCState *state, const THCTensor *self); +THC_API long THCTensor_(size)(THCState *state, const THCTensor *self, int dim); +THC_API long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim); +THC_API THLongStorage *THCTensor_(newSizeOf)(THCState *state, THCTensor *self); +THC_API THLongStorage *THCTensor_(newStrideOf)(THCState *state, THCTensor *self); +THC_API real *THCTensor_(data)(THCState *state, const THCTensor *self); -THC_API void THCudaTensor_setFlag(THCState *state, THCudaTensor *self, const char flag); -THC_API void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag); +THC_API void THCTensor_(setFlag)(THCState *state, THCTensor *self, const char flag); +THC_API void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag); /**** creation methods ****/ -THC_API THCudaTensor *THCudaTensor_new(THCState *state); -THC_API THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor); +THC_API THCTensor *THCTensor_(new)(THCState *state); +THC_API THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor); /* stride might be NULL */ -THC_API THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); -THC_API THCudaTensor *THCudaTensor_newWithStorage1d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); +THC_API THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_); -THC_API THCudaTensor *THCudaTensor_newWithStorage2d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_); -THC_API THCudaTensor *THCudaTensor_newWithStorage3d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_); -THC_API THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_); /* stride might be NULL */ -THC_API THCudaTensor *THCudaTensor_newWithSize(THCState *state, THLongStorage *size_, THLongStorage *stride_); -THC_API THCudaTensor *THCudaTensor_newWithSize1d(THCState *state, long size0_); -THC_API THCudaTensor *THCudaTensor_newWithSize2d(THCState *state, long size0_, long size1_); -THC_API THCudaTensor *THCudaTensor_newWithSize3d(THCState *state, long size0_, long size1_, long size2_); -THC_API THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0_, long size1_, long size2_, long size3_); - -THC_API THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self); -THC_API THCudaTensor *THCudaTensor_newContiguous(THCState *state, THCudaTensor *tensor); -THC_API THCudaTensor *THCudaTensor_newSelect(THCState *state, THCudaTensor *tensor, int dimension_, long sliceIndex_); -THC_API THCudaTensor *THCudaTensor_newNarrow(THCState *state, THCudaTensor *tensor, int dimension_, long firstIndex_, long size_); -THC_API THCudaTensor *THCudaTensor_newTranspose(THCState *state, THCudaTensor *tensor, int dimension1_, int dimension2_); -THC_API THCudaTensor *THCudaTensor_newUnfold(THCState *state, THCudaTensor *tensor, int dimension_, long size_, long step_); - -THC_API void THCudaTensor_resize(THCState *state, THCudaTensor *tensor, THLongStorage *size, THLongStorage *stride); -THC_API void THCudaTensor_resizeAs(THCState *state, THCudaTensor *tensor, THCudaTensor *src); -THC_API void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0_); -THC_API void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long size0_, long size1_); -THC_API void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_); -THC_API void THCudaTensor_resize4d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_); -THC_API void THCudaTensor_resize5d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_); -THC_API void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride); - -THC_API void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); -THC_API void THCudaTensor_setStorage1d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithSize)(THCState *state, THLongStorage *size_, THLongStorage *stride_); +THC_API THCTensor *THCTensor_(newWithSize1d)(THCState *state, long size0_); +THC_API THCTensor *THCTensor_(newWithSize2d)(THCState *state, long size0_, long size1_); +THC_API THCTensor *THCTensor_(newWithSize3d)(THCState *state, long size0_, long size1_, long size2_); +THC_API THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0_, long size1_, long size2_, long size3_); + +THC_API THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self); +THC_API THCTensor *THCTensor_(newContiguous)(THCState *state, THCTensor *tensor); +THC_API THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int dimension_, long sliceIndex_); +THC_API THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_); +THC_API THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_); +THC_API THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_); + +THC_API void THCTensor_(resize)(THCState *state, THCTensor *tensor, THLongStorage *size, THLongStorage *stride); +THC_API void THCTensor_(resizeAs)(THCState *state, THCTensor *tensor, THCTensor *src); +THC_API void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0_); +THC_API void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0_, long size1_); +THC_API void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_); +THC_API void THCTensor_(resize4d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_); +THC_API void THCTensor_(resize5d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_); +THC_API void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride); + +THC_API void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src); +THC_API void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); +THC_API void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_); -THC_API void THCudaTensor_setStorage2d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_); -THC_API void THCudaTensor_setStorage3d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_); -THC_API void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_); -THC_API void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long firstIndex_, long size_); -THC_API void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long sliceIndex_); -THC_API void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1_, int dimension2_); -THC_API void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long size_, long step_); +THC_API void THCTensor_(narrow)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long firstIndex_, long size_); +THC_API void THCTensor_(select)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long sliceIndex_); +THC_API void THCTensor_(transpose)(THCState *state, THCTensor *self, THCTensor *src, int dimension1_, int dimension2_); +THC_API void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long size_, long step_); -THC_API void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_); +THC_API void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src); +THC_API void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension_); -THC_API int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self); -THC_API int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const THCudaTensor *src); -THC_API int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCudaTensor *src); -THC_API int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongStorage *dims); -THC_API long THCudaTensor_nElement(THCState *state, const THCudaTensor *self); +THC_API int THCTensor_(isContiguous)(THCState *state, const THCTensor *self); +THC_API int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor *src); +THC_API int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src); +THC_API int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims); +THC_API long THCTensor_(nElement)(THCState *state, const THCTensor *self); -THC_API void THCudaTensor_retain(THCState *state, THCudaTensor *self); -THC_API void THCudaTensor_free(THCState *state, THCudaTensor *self); -THC_API void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst); +THC_API void THCTensor_(retain)(THCState *state, THCTensor *self); +THC_API void THCTensor_(free)(THCState *state, THCTensor *self); +THC_API void THCTensor_(freeCopyTo)(THCState *state, THCTensor *self, THCTensor *dst); /* Slow access methods [check everything] */ -THC_API void THCudaTensor_set1d(THCState *state, THCudaTensor *tensor, long x0, float value); -THC_API void THCudaTensor_set2d(THCState *state, THCudaTensor *tensor, long x0, long x1, float value); -THC_API void THCudaTensor_set3d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, float value); -THC_API void THCudaTensor_set4d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, long x3, float value); +THC_API void THCTensor_(set1d)(THCState *state, THCTensor *tensor, long x0, real value); +THC_API void THCTensor_(set2d)(THCState *state, THCTensor *tensor, long x0, long x1, real value); +THC_API void THCTensor_(set3d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, real value); +THC_API void THCTensor_(set4d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, long x3, real value); -THC_API float THCudaTensor_get1d(THCState *state, const THCudaTensor *tensor, long x0); -THC_API float THCudaTensor_get2d(THCState *state, const THCudaTensor *tensor, long x0, long x1); -THC_API float THCudaTensor_get3d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2); -THC_API float THCudaTensor_get4d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2, long x3); +THC_API real THCTensor_(get1d)(THCState *state, const THCTensor *tensor, long x0); +THC_API real THCTensor_(get2d)(THCState *state, const THCTensor *tensor, long x0, long x1); +THC_API real THCTensor_(get3d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2); +THC_API real THCTensor_(get4d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2, long x3); /* CUDA-specific functions */ -THC_API cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor *self); -THC_API int THCudaTensor_getDevice(THCState *state, const THCudaTensor *self); -THC_API int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...); +THC_API cudaTextureObject_t THCTensor_(getTextureObject)(THCState *state, THCTensor *self); +THC_API int THCTensor_(getDevice)(THCState *state, const THCTensor *self); +THC_API int THCTensor_(checkGPU)(THCState *state, unsigned int nTensors, ...); #endif diff --git a/lib/THC/generic/THCTensorCopy.c b/lib/THC/generic/THCTensorCopy.c index 5fbacca..c1813f9 100644 --- a/lib/THC/generic/THCTensorCopy.c +++ b/lib/THC/generic/THCTensorCopy.c @@ -1,42 +1,42 @@ -#include "THCTensorCopy.h" -#include "THCGeneral.h" -#include "THCTensor.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensorCopy.c" +#else /* specific methods */ -void THCudaTensor_copyFloat(THCState *state, THCudaTensor *self, struct THFloatTensor *src) +void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src) { - THArgCheck(THCudaTensor_nElement(state, self) == THFloatTensor_nElement(src), 2, "sizes do not match"); + THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); { - THCudaTensor *selfc = THCudaTensor_newContiguous(state, self); - src = THFloatTensor_newContiguous(src); - - THCudaCheck(cudaMemcpy(THCudaTensor_data(state, selfc), - THFloatTensor_data(src), - THFloatTensor_nElement(src) * sizeof(float), + THCTensor *selfc = THCTensor_(newContiguous)(state, self); + src = THTensor_(newContiguous)(src); + + THCudaCheck(cudaMemcpy(THCTensor_(data)(state,selfc), + THTensor_(data)(src), + THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice)); - THFloatTensor_free(src); - THCudaTensor_freeCopyTo(state, selfc, self); + THTensor_(free)(src); + THCTensor_(freeCopyTo)(state, selfc, self); } } -/* everything comes down to copy to a tensor of floats */ #define IMPLEMENT_TH_CUDA_TENSOR_COPY(TYPEC) \ -void THCudaTensor_copy##TYPEC(THCState *state, THCudaTensor *self, struct TH##TYPEC##Tensor *src) \ +void THCTensor_(copy##TYPEC)(THCState *state, THCTensor *self, struct TH##TYPEC##Tensor *src) \ { \ - THArgCheck(THCudaTensor_nElement(state, self) == TH##TYPEC##Tensor_nElement(src), 2, "sizes do not match"); \ - \ - { \ + THArgCheck(THCTensor_(nElement)(state, self) == TH##TYPEC##Tensor_nElement(src), 2, "sizes do not match"); \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCTensor_(copyCPU)(state, self, (THTensor*) src); /* cast just removes warnings */ \ + } else { \ THLongStorage *size = TH##TYPEC##Tensor_newSizeOf(src); \ - THFloatTensor *srcf = THFloatTensor_newWithSize(size, NULL); \ + THTensor *srcf = THTensor_(newWithSize)(size, NULL); \ \ - THFloatTensor_copy##TYPEC(srcf, src); \ - THCudaTensor_copyFloat(state, self, srcf); \ + THTensor_(copy##TYPEC)(srcf, src); \ + THCTensor_(copyCPU)(state, self, srcf); \ \ THLongStorage_free(size); \ - THFloatTensor_free(srcf); \ + THTensor_(free)(srcf); \ } \ } @@ -45,43 +45,45 @@ IMPLEMENT_TH_CUDA_TENSOR_COPY(Char) IMPLEMENT_TH_CUDA_TENSOR_COPY(Short) IMPLEMENT_TH_CUDA_TENSOR_COPY(Int) IMPLEMENT_TH_CUDA_TENSOR_COPY(Long) +IMPLEMENT_TH_CUDA_TENSOR_COPY(Float) IMPLEMENT_TH_CUDA_TENSOR_COPY(Double) /* copyCuda */ -void THFloatTensor_copyCuda(THCState *state, THFloatTensor *self, struct THCudaTensor *src) +void THTensor_(copyCuda)(THCState *state, THTensor *self, struct THCTensor *src) { - THArgCheck(THFloatTensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); + THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); { - THFloatTensor *selfc = THFloatTensor_newContiguous(self); - src = THCudaTensor_newContiguous(state, src); + THTensor *selfc = THTensor_(newContiguous)(self); + src = THCTensor_(newContiguous)(state, src); - THCudaCheck(cudaMemcpy(THFloatTensor_data(selfc), - THCudaTensor_data(state, src), - THCudaTensor_nElement(state, src) * sizeof(float), + THCudaCheck(cudaMemcpy(THTensor_(data)(selfc), + THCTensor_(data)(state, src), + THCTensor_(nElement)(state, src) * sizeof(real), cudaMemcpyDeviceToHost)); - THCudaTensor_free(state, src); - THFloatTensor_freeCopyTo(selfc, self); + THCTensor_(free)(state, src); + THTensor_(freeCopyTo)(selfc, self); } } -#define IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(TYPEC) \ - void TH##TYPEC##Tensor_copyCuda(THCState *state, TH##TYPEC##Tensor *self, struct THCudaTensor *src) \ - { \ - THArgCheck(TH##TYPEC##Tensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); \ - \ - { \ - THLongStorage *size = THCudaTensor_newSizeOf(state, src); \ - THFloatTensor *srcf = THFloatTensor_newWithSize(size, NULL); \ - \ - THFloatTensor_copyCuda(state, srcf, src); \ - TH##TYPEC##Tensor_copyFloat(self, srcf); \ - \ - THLongStorage_free(size); \ - THFloatTensor_free(srcf); \ - } \ +#define IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(TYPEC) \ + void TH_CONCAT_4(TH,TYPEC,Tensor_copyCuda,Real)(THCState *state, TH##TYPEC##Tensor *self, struct THCTensor *src) \ + { \ + THArgCheck(TH##TYPEC##Tensor_nElement(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THTensor_(copyCuda)(state, (THTensor*) self, src); /* cast just removes compiler warning */ \ + } else { \ + THLongStorage *size = THCTensor_(newSizeOf)(state, src); \ + THTensor *srcf = THTensor_(newWithSize)(size, NULL); \ + \ + THTensor_(copyCuda)(state, srcf, src); \ + TH_CONCAT_4(TH,TYPEC,Tensor_copy,Real)(self, srcf); \ + \ + THLongStorage_free(size); \ + THTensor_(free)(srcf); \ + } \ } IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Byte) @@ -89,23 +91,25 @@ IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Char) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Short) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Int) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Long) +IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Float) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Double) -void THCudaTensor_copyCuda(THCState *state, THCudaTensor *self, THCudaTensor *src) +// FIXME: add within-CUDA conversions +void THCTensor_(copyCuda)(THCState *state, THCTensor *self, THCTensor *src) { - THCudaTensor_copy(state, self, src); + THCTensor_(copy)(state, self, src); } -void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THFloatTensor *src) +void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, struct THTensor *src) { - THArgCheck(THCudaTensor_nElement(state, self) == THFloatTensor_nElement(src), 2, "sizes do not match"); - THArgCheck(THCudaTensor_isContiguous(state, self), 2, "Target tensor must be contiguous"); - THArgCheck(THFloatTensor_isContiguous(src), 3, "Source tensor must be contiguous"); + THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); + THArgCheck(THCTensor_(isContiguous)(state, self), 2, "Target tensor must be contiguous"); + THArgCheck(THTensor_(isContiguous)(src), 3, "Source tensor must be contiguous"); - if (THCudaTensor_nElement(state, self) == 0) return; + if (THCTensor_(nElement)(state, self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. - int tensorDevice = THCudaTensor_getDevice(state, self); + int tensorDevice = THCTensor_(getDevice)(state, self); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); @@ -113,9 +117,9 @@ void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THF THCudaCheck(cudaSetDevice(tensorDevice)); } - THCudaCheck(cudaMemcpyAsync(THCudaTensor_data(state, self), - THFloatTensor_data(src), - THFloatTensor_nElement(src) * sizeof(float), + THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, self), + THTensor_(data)(src), + THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice, THCState_getDeviceStream(state, tensorDevice, THCState_getCurrentStreamIndex(state)))); @@ -125,16 +129,16 @@ void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THF } } -void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct THCudaTensor *src) +void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src) { - THArgCheck(THFloatTensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); - THArgCheck(THFloatTensor_isContiguous(self), 2, "Target tensor must be contiguous"); - THArgCheck(THCudaTensor_isContiguous(state, src), 3, "Source tensor must be contiguous"); + THArgCheck(THTensor_(nElement)(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); + THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous"); + THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous"); - if (THFloatTensor_nElement(self) == 0) return; + if (THTensor_(nElement)(self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. - int tensorDevice = THCudaTensor_getDevice(state, src); + int tensorDevice = THCTensor_(getDevice)(state, src); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); @@ -142,9 +146,9 @@ void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct TH THCudaCheck(cudaSetDevice(tensorDevice)); } - THCudaCheck(cudaMemcpyAsync(THFloatTensor_data(self), - THCudaTensor_data(state, src), - THCudaTensor_nElement(state, src) * sizeof(float), + THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self), + THCTensor_(data)(state, src), + THCTensor_(nElement)(state, src) * sizeof(real), cudaMemcpyDeviceToHost, THCState_getDeviceStream(state, tensorDevice, THCState_getCurrentStreamIndex(state)))); @@ -153,3 +157,5 @@ void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct TH THCudaCheck(cudaSetDevice(currentDevice)); } } + +#endif diff --git a/lib/THC/generic/THCTensorCopy.cu b/lib/THC/generic/THCTensorCopy.cu index 5aa7ee5..304e52c 100644 --- a/lib/THC/generic/THCTensorCopy.cu +++ b/lib/THC/generic/THCTensorCopy.cu @@ -1,19 +1,15 @@ -#include "THCApply.cuh" - -static inline int curGPU() { - int curDev; - THCudaCheck(cudaGetDevice(&curDev)); - return curDev; -} +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensorCopy.cu" +#else THC_API void -THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { - long totalElements = THCudaTensor_nElement(state, dst); +THCTensor_(copy)(THCState* state, THCTensor* dst, THCTensor* src) { + long totalElements = THCTensor_(nElement)(state, dst); - THArgCheck(totalElements == THCudaTensor_nElement(state, src), 2, + THArgCheck(totalElements == THCTensor_(nElement)(state, src), 2, "sizes do not match"); - if (THCudaTensor_nDimension(state, dst) == 0) { + if (THCTensor_(nDimension)(state, dst) == 0) { // Zero-dim tensor; copy nothing return; } @@ -24,12 +20,12 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { // -FIXME: if both tensors have matching size and stride arrays, and no // holes within (in other words, there is some permutation that can be applied // to the size/strides such that the resulting tensor is contiguous). - bool srcContig = THCudaTensor_isContiguous(state, src); - bool dstContig = THCudaTensor_isContiguous(state, dst); + bool srcContig = THCTensor_(isContiguous)(state, src); + bool dstContig = THCTensor_(isContiguous)(state, dst); bool memcpyEligible = (srcContig && dstContig) || (totalElements == 1); - int srcDev = THCudaTensor_getDevice(state, src); - int dstDev = THCudaTensor_getDevice(state, dst); + int srcDev = THCTensor_(getDevice)(state, src); + int dstDev = THCTensor_(getDevice)(state, dst); int oldDev = curGPU(); // We always perform the copy on the source device, using the @@ -71,12 +67,13 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { // We are now on srcDev if (memcpyEligible) { // Perform the copy - THCudaCheck(cudaMemcpyAsync(THCudaTensor_data(state, dst), - THCudaTensor_data(state, src), - totalElements * sizeof(float), + THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, dst), + THCTensor_(data)(state, src), + totalElements * sizeof(real), cudaMemcpyDeviceToDevice, copyStream)); } else { +#ifdef THC_REAL_IS_FLOAT // Non-contiguous copy // We avoid creating temporary memory copies if possible. @@ -139,6 +136,11 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { THCudaTensor_freeCopyTo(state, dstContig, dst); } } +#else +#define STRINGIFY(x) #x + THError("Non-contiguous copy not implemented for Cuda%sTensor", STRINGIFY(Real)); +#undef STRINGIFY +#endif } if (srcDev != dstDev && copyStreamIndex == 0) { @@ -170,3 +172,34 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { THError(cudaGetErrorString(errcode)); } } + +// conversions are mediated by the CPU +// yes, this is slow; feel free to write CUDA kernels for this +#define THC_CUDA_TENSOR_IMPLEMENT_COPY(TYPEC,TYPECUDA) \ + void THCTensor_(copyCuda##TYPEC)(THCState *state, THCTensor *self, struct THCuda##TYPECUDA##Tensor *src) \ + { \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCTensor_(copy)(state, self, (THCTensor*) src); /* cast just removes compiler warning */ \ + } else { \ + THArgCheck(THCTensor_(nElement)(state, self) == THCuda##TYPECUDA##Tensor_nElement(state, src), 2, "size does not match"); \ + THLongStorage *size = THCuda##TYPECUDA##Tensor_newSizeOf(state, src); \ + TH##TYPEC##Tensor *buffer1 = TH##TYPEC##Tensor_newWithSize(size, NULL); \ + THTensor *buffer2 = THTensor_(newWithSize)(size, NULL); \ + TH##TYPEC##Tensor_copyCuda(state, buffer1, src); \ + THTensor_(copy##TYPEC)(buffer2, buffer1); \ + THCTensor_(copyCPU)(state, self, buffer2); \ + THLongStorage_free(size); \ + TH##TYPEC##Tensor_free(buffer1); \ + THTensor_(free)(buffer2); \ + } \ + } + +THC_CUDA_TENSOR_IMPLEMENT_COPY(Byte,Byte) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Char,Char) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Short,Short) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Int,Int) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Long,Long) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Float,) // i.e. float +THC_CUDA_TENSOR_IMPLEMENT_COPY(Double,Double) + +#endif diff --git a/lib/THC/generic/THCTensorCopy.h b/lib/THC/generic/THCTensorCopy.h index d843213..b71fe0f 100644 --- a/lib/THC/generic/THCTensorCopy.h +++ b/lib/THC/generic/THCTensorCopy.h @@ -1,28 +1,36 @@ -#ifndef TH_CUDA_TENSOR_COPY_INC -#define TH_CUDA_TENSOR_COPY_INC +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensorCopy.h" +#else -#include "THCTensor.h" -#include "THCGeneral.h" +THC_API void THCTensor_(copy)(THCState *state, THCTensor *self, THCTensor *src); +THC_API void THCTensor_(copyByte)(THCState *state, THCTensor *self, THByteTensor *src); +THC_API void THCTensor_(copyChar)(THCState *state, THCTensor *self, THCharTensor *src); +THC_API void THCTensor_(copyShort)(THCState *state, THCTensor *self, THShortTensor *src); +THC_API void THCTensor_(copyInt)(THCState *state, THCTensor *self, THIntTensor *src); +THC_API void THCTensor_(copyLong)(THCState *state, THCTensor *self, THLongTensor *src); +THC_API void THCTensor_(copyFloat)(THCState *state, THCTensor *self, THFloatTensor *src); +THC_API void THCTensor_(copyDouble)(THCState *state, THCTensor *self, THDoubleTensor *src); -THC_API void THCudaTensor_copy(THCState *state, THCudaTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_copyByte(THCState *state, THCudaTensor *self, THByteTensor *src); -THC_API void THCudaTensor_copyChar(THCState *state, THCudaTensor *self, THCharTensor *src); -THC_API void THCudaTensor_copyShort(THCState *state, THCudaTensor *self, THShortTensor *src); -THC_API void THCudaTensor_copyInt(THCState *state, THCudaTensor *self, THIntTensor *src); -THC_API void THCudaTensor_copyLong(THCState *state, THCudaTensor *self, THLongTensor *src); -THC_API void THCudaTensor_copyFloat(THCState *state, THCudaTensor *self, THFloatTensor *src); -THC_API void THCudaTensor_copyDouble(THCState *state, THCudaTensor *self, THDoubleTensor *src); +THC_API void THCTensor_(copyCudaByte)(THCState *state, THCTensor *storage, struct THCudaByteTensor *src); +THC_API void THCTensor_(copyCudaChar)(THCState *state, THCTensor *storage, struct THCudaCharTensor *src); +THC_API void THCTensor_(copyCudaShort)(THCState *state, THCTensor *storage, struct THCudaShortTensor *src); +THC_API void THCTensor_(copyCudaInt)(THCState *state, THCTensor *storage, struct THCudaIntTensor *src); +THC_API void THCTensor_(copyCudaLong)(THCState *state, THCTensor *storage, struct THCudaLongTensor *src); +THC_API void THCTensor_(copyCudaFloat)(THCState *state, THCTensor *storage, struct THCudaTensor *src); +THC_API void THCTensor_(copyCudaDouble)(THCState *state, THCTensor *storage, struct THCudaDoubleTensor *src); -THC_API void THByteTensor_copyCuda(THCState *state, THByteTensor *self, THCudaTensor *src); -THC_API void THCharTensor_copyCuda(THCState *state, THCharTensor *self, THCudaTensor *src); -THC_API void THShortTensor_copyCuda(THCState *state, THShortTensor *self, THCudaTensor *src); -THC_API void THIntTensor_copyCuda(THCState *state, THIntTensor *self, THCudaTensor *src); -THC_API void THLongTensor_copyCuda(THCState *state, THLongTensor *self, THCudaTensor *src); -THC_API void THFloatTensor_copyCuda(THCState *state, THFloatTensor *self, THCudaTensor *src); -THC_API void THDoubleTensor_copyCuda(THCState *state, THDoubleTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_copyCuda(THCState *state, THCudaTensor *self, THCudaTensor *src); +THC_API void TH_CONCAT_2(THByteTensor_copyCuda , Real) (THCState *state, THByteTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THCharTensor_copyCuda , Real) (THCState *state, THCharTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THShortTensor_copyCuda , Real) (THCState *state, THShortTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THIntTensor_copyCuda , Real) (THCState *state, THIntTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THLongTensor_copyCuda , Real) (THCState *state, THLongTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THFloatTensor_copyCuda , Real) (THCState *state, THFloatTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THDoubleTensor_copyCuda, Real) (THCState *state, THDoubleTensor *self, THCTensor *src); +THC_API void THCTensor_(copyCuda) (THCState *state, THCTensor *self, THCTensor *src); +THC_API void THTensor_(copyCuda) (THCState *state, THTensor *self, THCTensor *src); +THC_API void THCTensor_(copyCPU) (THCState *state, THCTensor *self, THTensor *src); -THC_API void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, THFloatTensor *src); -THC_API void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, THCudaTensor *src); +THC_API void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, THTensor *src); +THC_API void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, THCTensor *src); #endif |