diff options
37 files changed, 1490 insertions, 993 deletions
@@ -33,49 +33,71 @@ typedef struct THCState cudaStream_t THCState_getCurrentStream(THCState *state); -typedef struct THCudaStorage +]] + + local CudaTypes = { + {'float', ''}, + {'unsigned char', 'Byte'}, + {'char', 'Char'}, + {'short', 'Short'}, + {'int', 'Int'}, + {'long','Long'}, + {'double','Double'}, + } + + for _, typedata in ipairs(CudaTypes) do + local real, Real = unpack(typedata) + ctype_def = [[ +typedef struct THCStorage { - float *data; + real *data; long size; int refcount; char flag; THAllocator *allocator; void *allocatorContext; - struct THCudaStorage *view; -} THCudaStorage; + struct THCStorage *view; +} THCStorage; -typedef struct THCudaTensor +typedef struct THCTensor { long *size; long *stride; int nDimension; - THCudaStorage *storage; + THCStorage *storage; long storageOffset; int refcount; char flag; -} THCudaTensor; +} THCTensor; ]] - ffi.cdef(cdefs) - local Storage = torch.getmetatable('torch.CudaStorage') - local Storage_tt = ffi.typeof('THCudaStorage**') - - rawset(Storage, "cdata", function(self) return Storage_tt(self)[0] end) - rawset(Storage, "data", function(self) return Storage_tt(self)[0].data end) - -- Tensor - local Tensor = torch.getmetatable('torch.CudaTensor') - local Tensor_tt = ffi.typeof('THCudaTensor**') - - rawset(Tensor, "cdata", function(self) return Tensor_tt(self)[0] end) + ctype_def = ctype_def:gsub('real',real):gsub('THCStorage','THCuda'..Real..'Storage'):gsub('THCTensor','THCuda'..Real..'Tensor') + cdefs = cdefs .. ctype_def + end + ffi.cdef(cdefs) - rawset(Tensor, "data", - function(self) - self = Tensor_tt(self)[0] - return self.storage ~= nil and self.storage.data + self.storageOffset or nil - end - ) + for _, typedata in ipairs(CudaTypes) do + local real, Real = unpack(typedata) + local Storage = torch.getmetatable('torch.Cuda' .. Real .. 'Storage') + local Storage_tt = ffi.typeof('THCuda' .. Real .. 'Storage**') + + rawset(Storage, "cdata", function(self) return Storage_tt(self)[0] end) + rawset(Storage, "data", function(self) return Storage_tt(self)[0].data end) + -- Tensor + local Tensor = torch.getmetatable('torch.Cuda' .. Real .. 'Tensor') + local Tensor_tt = ffi.typeof('THCuda' .. Real .. 'Tensor**') + + rawset(Tensor, "cdata", function(self) return Tensor_tt(self)[0] end) + + rawset(Tensor, "data", + function(self) + self = Tensor_tt(self)[0] + return self.storage ~= nil and self.storage.data + self.storageOffset or nil + end + ) + end end diff --git a/Storage.c b/Storage.c new file mode 100644 index 0000000..a7478a0 --- /dev/null +++ b/Storage.c @@ -0,0 +1,12 @@ +#include "torch/utils.h" +#include "THC.h" +#include "THFile.h" +#include "luaT.h" + +#define torch_Storage_(NAME) TH_CONCAT_4(torch_,CReal,Storage_,NAME) +#define torch_Storage TH_CONCAT_STRING_3(torch.,CReal,Storage) +#define cutorch_Storage_(NAME) TH_CONCAT_4(cutorch_,CReal,Storage_,NAME) + +#include "generic/CStorage.c" +#include "THCGenerateAllTypes.h" + diff --git a/Tensor.c b/Tensor.c new file mode 100644 index 0000000..3adeddd --- /dev/null +++ b/Tensor.c @@ -0,0 +1,13 @@ +#include "torch/utils.h" +#include "THC.h" +#include "THFile.h" +#include "luaT.h" + +#define torch_Storage_(NAME) TH_CONCAT_4(torch_,CReal,Storage_,NAME) +#define torch_Storage TH_CONCAT_STRING_3(torch.,CReal,Storage) +#define torch_Tensor_(NAME) TH_CONCAT_4(torch_,CReal,Tensor_,NAME) +#define torch_Tensor TH_CONCAT_STRING_3(torch.,CReal,Tensor) +#define cutorch_Tensor_(NAME) TH_CONCAT_4(cutorch_,CReal,Tensor_,NAME) + +#include "generic/CTensor.c" +#include "THCGenerateAllTypes.h" @@ -21,55 +21,42 @@ end local function Tensor__typeAs(self,tensor) return self:type(tensor:type()) end -local function Tensor__cuda(self) - return self:type('torch.CudaTensor') -end -local function Tensor__double(self) - return self:type('torch.DoubleTensor') -end -local function Tensor__float(self) - return self:type('torch.FloatTensor') -end -local function Tensor__byte(self) - return self:type('torch.ByteTensor') -end +local TensorTypes = { + float = 'torch.FloatTensor', + double = 'torch.DoubleTensor', + byte = 'torch.ByteTensor', + char = 'torch.CharTensor', + int = 'torch.IntTensor', + short = 'torch.ShortTensor', + long = 'torch.LongTensor', + cuda = 'torch.CudaTensor', + cudaDouble = 'torch.CudaDoubleTensor', + cudaByte = 'torch.CudaByteTensor', + cudaChar = 'torch.CudaCharTensor', + cudaInt = 'torch.CudaIntTensor', + cudaShort = 'torch.CudaShortTensor', + cudaLong = 'torch.CudaLongTensor' +} -local function Tensor__char(self) - return self:type('torch.CharTensor') -end -local function Tensor__int(self) - return self:type('torch.IntTensor') +local function Tensor__converter(type) + return function(self) + return self:type(type) + end end -local function Tensor__short(self) - return self:type('torch.ShortTensor') +for _, SrcType in pairs(TensorTypes) do + for FuncName, DstType in pairs(TensorTypes) do + rawset(torch.getmetatable(SrcType), FuncName, Tensor__converter(DstType)) + end end -local function Tensor__long(self) - return self:type('torch.LongTensor') +for _, CudaTensorType in pairs(TensorTypes) do + rawset(torch.getmetatable(CudaTensorType), 'type', Tensor__type) + rawset(torch.getmetatable(CudaTensorType), 'typeAs', Tensor__typeAs) end -rawset(torch.getmetatable('torch.DoubleTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.FloatTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.ByteTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.CharTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.IntTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.ShortTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.LongTensor'), 'cuda', Tensor__cuda) -rawset(torch.getmetatable('torch.CudaTensor'), 'cuda', Tensor__cuda) - -rawset(torch.getmetatable('torch.CudaTensor'), 'type', Tensor__type) -rawset(torch.getmetatable('torch.CudaTensor'), 'typeAs', Tensor__typeAs) -rawset(torch.getmetatable('torch.CudaTensor'), 'double', Tensor__double) -rawset(torch.getmetatable('torch.CudaTensor'), 'float', Tensor__float) -rawset(torch.getmetatable('torch.CudaTensor'), 'byte', Tensor__byte) -rawset(torch.getmetatable('torch.CudaTensor'), 'char', Tensor__char) -rawset(torch.getmetatable('torch.CudaTensor'), 'int', Tensor__int) -rawset(torch.getmetatable('torch.CudaTensor'), 'short', Tensor__short) -rawset(torch.getmetatable('torch.CudaTensor'), 'long', Tensor__long) - do local metatable = torch.getmetatable('torch.CudaTensor') for _,func in pairs{'expand', 'expandAs', 'view', 'viewAs', 'repeatTensor', diff --git a/generic/CStorage.c b/generic/CStorage.c index 11ea696..c5626d4 100644 --- a/generic/CStorage.c +++ b/generic/CStorage.c @@ -1,65 +1,68 @@ -#include "torch/utils.h" -#include "THC.h" -#include "THFile.h" -#include "luaT.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/CStorage.c" +#else /* everything is as the generic Storage.c, except few things (see below) */ -#define real float -#define Real Cuda -#define TH_GENERIC_FILE "generic/Storage.c" - -#define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME) - #define THFile_readRealRaw(file, data, size) \ { \ - float *fdata = (float*)THAlloc(sizeof(float)*size); \ - THFile_readFloatRaw(file, fdata, size); \ - THCudaCheck(cudaMemcpy(data, fdata, size * sizeof(float), cudaMemcpyHostToDevice)); \ + real *fdata = (real*)THAlloc(sizeof(real)*size); \ + TH_CONCAT_3(THFile_read,Real,Raw)(file, fdata, size); \ + THCudaCheck(cudaMemcpy(data, fdata, size * sizeof(real), cudaMemcpyHostToDevice)); \ THFree(fdata); \ } #define THFile_writeRealRaw(file, data, size) \ { \ - float *fdata = (float*)THAlloc(sizeof(float)*size); \ - THCudaCheck(cudaMemcpy(fdata, data, size * sizeof(float), cudaMemcpyDeviceToHost)); \ - THFile_writeFloatRaw(file, fdata, size); \ + real *fdata = (real*)THAlloc(sizeof(real)*size); \ + THCudaCheck(cudaMemcpy(fdata, data, size * sizeof(real), cudaMemcpyDeviceToHost)); \ + TH_CONCAT_3(THFile_write,Real,Raw)(file, fdata, size); \ THFree(fdata); \ } -#define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage) - +#define TH_GENERIC_FILE "generic/Storage.c" #include "generic/Storage.c" -#undef real -#undef Real #undef TH_GENERIC_FILE +#undef THFile_readRealRaw +#undef THFile_writeRealRaw /* now we overwrite some methods specific to CudaStorage */ -static int cutorch_CudaStorage_copy(lua_State *L) +static int cutorch_Storage_(copy)(lua_State *L) { THCState *state = cutorch_getstate(L); - THCudaStorage *storage = luaT_checkudata(L, 1, "torch.CudaStorage"); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); void *src; - if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) - THCudaStorage_copy(state, storage, src); + if( (src = luaT_toudata(L, 2, "torch.CudaByteStorage")) ) + THCStorage_(copyCudaByte)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaCharStorage")) ) + THCStorage_(copyCudaChar)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaShortStorage")) ) + THCStorage_(copyCudaShort)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaIntStorage")) ) + THCStorage_(copyCudaInt)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaLongStorage")) ) + THCStorage_(copyCudaLong)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) + THCStorage_(copyCudaFloat)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleStorage")) ) + THCStorage_(copyCudaDouble)(state, storage, src); + else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) - THCudaStorage_copyByte(state, storage, src); + THCStorage_(copyByte)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) - THCudaStorage_copyChar(state, storage, src); + THCStorage_(copyChar)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) - THCudaStorage_copyShort(state, storage, src); + THCStorage_(copyShort)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) - THCudaStorage_copyInt(state, storage, src); + THCStorage_(copyInt)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) - THCudaStorage_copyLong(state, storage, src); + THCStorage_(copyLong)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) - THCudaStorage_copyFloat(state, storage, src); + THCStorage_(copyFloat)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) - THCudaStorage_copyDouble(state, storage, src); - else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) - THCudaStorage_copyCuda(state, storage, src); + THCStorage_(copyDouble)(state, storage, src); else luaL_typerror(L, 2, "torch.*Storage"); @@ -67,77 +70,63 @@ static int cutorch_CudaStorage_copy(lua_State *L) return 1; } -#define CUDA_IMPLEMENT_STORAGE_COPY(TYPEC) \ - static int cutorch_##TYPEC##Storage_copy(lua_State *L) \ - { \ - TH##TYPEC##Storage *storage = luaT_checkudata(L, 1, "torch." #TYPEC "Storage"); \ - void *src; \ - if( (src = luaT_toudata(L, 2, "torch." #TYPEC "Storage")) ) \ - TH##TYPEC##Storage_copy(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) \ - TH##TYPEC##Storage_copyByte(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) \ - TH##TYPEC##Storage_copyChar(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) \ - TH##TYPEC##Storage_copyShort(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) \ - TH##TYPEC##Storage_copyInt(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) \ - TH##TYPEC##Storage_copyLong(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) \ - TH##TYPEC##Storage_copyFloat(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) \ - TH##TYPEC##Storage_copyDouble(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) \ - TH##TYPEC##Storage_copyCuda(cutorch_getstate(L), storage, src); \ - else \ - luaL_typerror(L, 2, "torch.*Storage"); \ - \ - lua_settop(L, 1); \ - return 1; \ -} +static int TH_CONCAT_3(cutorch_,Real,Storage_copy)(lua_State *L) +{ + THStorage *storage = luaT_checkudata(L, 1, TH_CONCAT_STRING_3(torch.,Real,Storage)); + void *src; + if( (src = luaT_toudata(L, 2, TH_CONCAT_STRING_3(torch.,Real,Storage) ))) + THStorage_(copy)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) + THStorage_(copyByte)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) + THStorage_(copyChar)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) + THStorage_(copyShort)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) + THStorage_(copyInt)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) + THStorage_(copyLong)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) + THStorage_(copyFloat)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) + THStorage_(copyDouble)(storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) + THStorage_(copyCudaFloat)(cutorch_getstate(L), storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaLongStorage")) ) + THStorage_(copyCudaLong)(cutorch_getstate(L), storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaByteStorage")) ) + THStorage_(copyCudaByte)(cutorch_getstate(L), storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaCharStorage")) ) + THStorage_(copyCudaChar)(cutorch_getstate(L), storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaShortStorage")) ) + THStorage_(copyCudaShort)(cutorch_getstate(L), storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaIntStorage")) ) + THStorage_(copyCudaInt)(cutorch_getstate(L), storage, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleStorage")) ) + THStorage_(copyCudaDouble)(cutorch_getstate(L), storage, src); + else + luaL_typerror(L, 2, "torch.*Storage"); -CUDA_IMPLEMENT_STORAGE_COPY(Byte) -CUDA_IMPLEMENT_STORAGE_COPY(Char) -CUDA_IMPLEMENT_STORAGE_COPY(Short) -CUDA_IMPLEMENT_STORAGE_COPY(Int) -CUDA_IMPLEMENT_STORAGE_COPY(Long) -CUDA_IMPLEMENT_STORAGE_COPY(Float) -CUDA_IMPLEMENT_STORAGE_COPY(Double) + lua_settop(L, 1); + return 1; +} -void cutorch_CudaStorage_init(lua_State* L) +void cutorch_Storage_(init)(lua_State* L) { /* the standard stuff */ - torch_CudaStorage_init(L); - - /* the copy methods */ - { - int i; - - const void* tnames[8] = {"torch.ByteStorage", - "torch.CharStorage", - "torch.ShortStorage", - "torch.IntStorage", - "torch.LongStorage", - "torch.FloatStorage", - "torch.DoubleStorage", - "torch.CudaStorage"}; - - static int (*funcs[8])(lua_State*) = {cutorch_ByteStorage_copy, - cutorch_CharStorage_copy, - cutorch_ShortStorage_copy, - cutorch_IntStorage_copy, - cutorch_LongStorage_copy, - cutorch_FloatStorage_copy, - cutorch_DoubleStorage_copy, - cutorch_CudaStorage_copy}; - - for(i = 0; i < 8; i++) - { - luaT_pushmetatable(L, tnames[i]); - lua_pushcfunction(L, funcs[i]); - lua_setfield(L, -2, "copy"); - lua_pop(L, 1); - } - } + torch_Storage_(init)(L); + + // torch_Storage macro is defined in Storage.c produce the CudaTensor types + // so I have to construct the normal torch types by hand + luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Storage)); + lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Storage_copy)); + lua_setfield(L, -2, "copy"); + lua_pop(L, 1); + + luaT_pushmetatable(L, torch_Storage); + lua_pushcfunction(L, cutorch_Storage_(copy)); + lua_setfield(L, -2, "copy"); + lua_pop(L, 1); } + +#endif diff --git a/generic/CTensor.c b/generic/CTensor.c index 5666dec..79a8a48 100644 --- a/generic/CTensor.c +++ b/generic/CTensor.c @@ -1,49 +1,48 @@ -#include "torch/utils.h" -#include "THC.h" -#include "THFile.h" -#include "luaT.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/CTensor.c" +#else /* everything is as the generic Storage.c, except few things (see below) */ -#define real float -#define Real Cuda - -#define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME) -#define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage) -#define torch_Tensor_(NAME) TH_CONCAT_4(torch_,Real,Tensor_,NAME) -#define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor) - #define TH_GENERIC_FILE "generic/Tensor.c" #include "generic/Tensor.c" #undef TH_GENERIC_FILE -#undef real -#undef Real - /* now we overwrite some methods specific to CudaTensor */ -static int cutorch_CudaTensor_copy(lua_State *L) +static int cutorch_Tensor_(copy)(lua_State *L) { THCState *state = cutorch_getstate(L); - THCudaTensor *storage = luaT_checkudata(L, 1, "torch.CudaTensor"); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); void *src; if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) - THCudaTensor_copy(state, storage, src); + THCTensor_(copyCudaFloat)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaByteTensor")) ) + THCTensor_(copyCudaByte)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaCharTensor")) ) + THCTensor_(copyCudaChar)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaShortTensor")) ) + THCTensor_(copyCudaShort)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaIntTensor")) ) + THCTensor_(copyCudaInt)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaLongTensor")) ) + THCTensor_(copyCudaLong)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleTensor")) ) + THCTensor_(copyCudaDouble)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) - THCudaTensor_copyByte(state, storage, src); + THCTensor_(copyByte)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) - THCudaTensor_copyChar(state, storage, src); + THCTensor_(copyChar)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) - THCudaTensor_copyShort(state, storage, src); + THCTensor_(copyShort)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) - THCudaTensor_copyInt(state, storage, src); + THCTensor_(copyInt)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) - THCudaTensor_copyLong(state, storage, src); + THCTensor_(copyLong)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) - THCudaTensor_copyFloat(state, storage, src); + THCTensor_(copyFloat)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) - THCudaTensor_copyDouble(state, storage, src); - else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) - THCudaTensor_copyCuda(state, storage, src); + THCTensor_(copyDouble)(state, tensor, src); else luaL_typerror(L, 2, "torch.*Tensor"); @@ -51,73 +50,84 @@ static int cutorch_CudaTensor_copy(lua_State *L) return 1; } -static int cutorch_CudaTensor_copyAsync(lua_State *L) +static int cutorch_Tensor_(copyAsyncCPU)(lua_State *L) { +#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor) THCState *state = cutorch_getstate(L); - THCudaTensor *storage = luaT_checkudata(L, 1, "torch.CudaTensor"); + THCTensor *tensor = luaT_checkudata(L, 1, STRINGIFY_TENSOR(CReal)); void *src; - if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) - THCudaTensor_copy(state, storage, src); - else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) - THCudaTensor_copyAsyncFloat(state, storage, src); + if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(CReal)))) + THCTensor_(copy)(state, tensor, src); + else if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(Real)))) + THCTensor_(copyAsyncCPU)(state, tensor, src); else - luaL_typerror(L, 2, "torch.FloatTensor or torch.CudaTensor"); + luaL_typerror(L, 2, STRINGIFY_TENSOR(Real) " or " STRINGIFY_TENSOR(CReal)); lua_settop(L, 1); return 1; +#undef STRINGIFY_TENSOR } -#define CUDA_IMPLEMENT_TENSOR_COPY(TYPEC) \ - static int cutorch_##TYPEC##Tensor_copy(lua_State *L) \ - { \ - TH##TYPEC##Tensor *storage = luaT_checkudata(L, 1, "torch." #TYPEC "Tensor"); \ - void *src; \ - if( (src = luaT_toudata(L, 2, "torch." #TYPEC "Tensor")) ) \ - TH##TYPEC##Tensor_copy(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) \ - TH##TYPEC##Tensor_copyByte(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) \ - TH##TYPEC##Tensor_copyChar(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) \ - TH##TYPEC##Tensor_copyShort(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) \ - TH##TYPEC##Tensor_copyInt(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) \ - TH##TYPEC##Tensor_copyLong(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) \ - TH##TYPEC##Tensor_copyFloat(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) \ - TH##TYPEC##Tensor_copyDouble(storage, src); \ - else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) \ - TH##TYPEC##Tensor_copyCuda(cutorch_getstate(L), storage, src); \ - else \ - luaL_typerror(L, 2, "torch.*Tensor"); \ - \ - lua_settop(L, 1); \ - return 1; \ - } -CUDA_IMPLEMENT_TENSOR_COPY(Byte) -CUDA_IMPLEMENT_TENSOR_COPY(Char) -CUDA_IMPLEMENT_TENSOR_COPY(Short) -CUDA_IMPLEMENT_TENSOR_COPY(Int) -CUDA_IMPLEMENT_TENSOR_COPY(Long) -CUDA_IMPLEMENT_TENSOR_COPY(Float) -CUDA_IMPLEMENT_TENSOR_COPY(Double) +static int TH_CONCAT_3(cutorch_,Real,Tensor_copy)(lua_State *L) +{ + THTensor *tensor = luaT_checkudata(L, 1, TH_CONCAT_STRING_3(torch.,Real,Tensor)); + void *src; + if( (src = luaT_toudata(L, 2, TH_CONCAT_STRING_3(torch.,Real,Tensor)) )) + THTensor_(copy)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) + THTensor_(copyByte)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) + THTensor_(copyChar)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) + THTensor_(copyShort)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) + THTensor_(copyInt)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) + THTensor_(copyLong)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) + THTensor_(copyFloat)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) + THTensor_(copyDouble)(tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaByteTensor")) ) + THTensor_(copyCudaByte)(cutorch_getstate(L), tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaCharTensor")) ) + THTensor_(copyCudaChar)(cutorch_getstate(L), tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaShortTensor")) ) + THTensor_(copyCudaShort)(cutorch_getstate(L), tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaIntTensor")) ) + THTensor_(copyCudaInt)(cutorch_getstate(L), tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaLongTensor")) ) + THTensor_(copyCudaLong)(cutorch_getstate(L), tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) + THTensor_(copyCudaFloat)(cutorch_getstate(L), tensor, src); + else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleTensor")) ) + THTensor_(copyCudaDouble)(cutorch_getstate(L), tensor, src); + else + luaL_typerror(L, 2, "torch.*Tensor"); + + lua_settop(L, 1); + return 1; +} -static int cutorch_FloatTensor_copyAsync(lua_State *L) +static int TH_CONCAT_3(cutorch_,Real,Tensor_copyAsyncCuda)(lua_State *L) { - THFloatTensor *storage = luaT_checkudata(L, 1, "torch.FloatTensor"); +#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor) + THTensor *tensor = luaT_checkudata(L, 1, STRINGIFY_TENSOR(Real)); void *src; - if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) - THFloatTensor_copyAsyncCuda(cutorch_getstate(L), storage, src); + if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(CReal)))) + THTensor_(copyAsyncCuda)(cutorch_getstate(L), tensor, src); else - luaL_typerror(L, 2, "torch.CudaTensor"); + luaL_typerror(L, 2, STRINGIFY_TENSOR(CReal)); lua_settop(L, 1); return 1; +#undef STRINGIFY_TENSOR } + + +#ifdef THC_REAL_IS_FLOAT static void THFloatTensor_computesz(THFloatTensor *self, long **sz_, long **st_) { long *sz, *st, *szh; @@ -201,69 +211,55 @@ static int cuda_FloatTensor_fakecopy(lua_State *L) lua_settop(L, 1); return 1; } +#endif -static int cutorch_CudaTensor_getDevice(lua_State *L) { - THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor"); - lua_pushinteger(L, THCudaTensor_getDevice(cutorch_getstate(L), tensor) + 1); +static int cutorch_Tensor_(getDevice)(lua_State *L) { + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + lua_pushinteger(L, THCTensor_(getDevice)(cutorch_getstate(L), tensor) + 1); return 1; } -void cutorch_CudaTensor_init(lua_State* L) +void cutorch_Tensor_(init)(lua_State* L) { /* the standard stuff */ - torch_CudaTensor_init(L); + torch_Tensor_(init)(L); /* additional methods */ +#ifdef THC_REAL_IS_FLOAT luaT_pushmetatable(L, "torch.FloatTensor"); lua_pushcfunction(L, cuda_FloatTensor_fakecopy); lua_setfield(L, -2, "fakecopy"); lua_pop(L, 1); +#endif - /* the copy methods */ - { - int i; - - const void* tnames[8] = {"torch.ByteTensor", - "torch.CharTensor", - "torch.ShortTensor", - "torch.IntTensor", - "torch.LongTensor", - "torch.FloatTensor", - "torch.DoubleTensor", - "torch.CudaTensor"}; - - static int (*funcs[8])(lua_State*) = {cutorch_ByteTensor_copy, - cutorch_CharTensor_copy, - cutorch_ShortTensor_copy, - cutorch_IntTensor_copy, - cutorch_LongTensor_copy, - cutorch_FloatTensor_copy, - cutorch_DoubleTensor_copy, - cutorch_CudaTensor_copy}; - - for(i = 0; i < 8; i++) - { - luaT_pushmetatable(L, tnames[i]); - lua_pushcfunction(L, funcs[i]); - lua_setfield(L, -2, "copy"); - lua_pop(L, 1); - } + // torch_Storage macro is defined in Storage.c produce the CudaTensor types + // so I have to construct the normal torch types by hand + luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Tensor)); + lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Tensor_copy)); + lua_setfield(L, -2, "copy"); + lua_pop(L, 1); - // Register async copy methods. - luaT_pushmetatable(L, "torch.CudaTensor"); - lua_pushcfunction(L, cutorch_CudaTensor_copyAsync); - lua_setfield(L, -2, "copyAsync"); - lua_pop(L, 1); + luaT_pushmetatable(L, torch_Tensor); + lua_pushcfunction(L, cutorch_Tensor_(copy)); + lua_setfield(L, -2, "copy"); + lua_pop(L, 1); - luaT_pushmetatable(L, "torch.FloatTensor"); - lua_pushcfunction(L, cutorch_FloatTensor_copyAsync); - lua_setfield(L, -2, "copyAsync"); - lua_pop(L, 1); - } + // Register async copy methods. + luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Tensor)); + lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Tensor_copyAsyncCuda)); + lua_setfield(L, -2, "copyAsync"); + lua_pop(L, 1); + + luaT_pushmetatable(L, torch_Tensor); + lua_pushcfunction(L, cutorch_Tensor_(copyAsyncCPU)); + lua_setfield(L, -2, "copyAsync"); + lua_pop(L, 1); - luaT_pushmetatable(L, "torch.CudaTensor"); - lua_pushcfunction(L, cutorch_CudaTensor_getDevice); + luaT_pushmetatable(L, torch_Tensor); + lua_pushcfunction(L, cutorch_Tensor_(getDevice)); lua_setfield(L, -2, "getDevice"); lua_pop(L, 1); } + +#endif @@ -3,8 +3,22 @@ #include "THCGeneral.h" #include "THCTensorRandom.h" +extern void cutorch_CudaByteStorage_init(lua_State* L); +extern void cutorch_CudaCharStorage_init(lua_State* L); +extern void cutorch_CudaShortStorage_init(lua_State* L); +extern void cutorch_CudaIntStorage_init(lua_State* L); +extern void cutorch_CudaLongStorage_init(lua_State* L); extern void cutorch_CudaStorage_init(lua_State* L); +extern void cutorch_CudaDoubleStorage_init(lua_State* L); + +extern void cutorch_CudaByteTensor_init(lua_State* L); +extern void cutorch_CudaCharTensor_init(lua_State* L); +extern void cutorch_CudaShortTensor_init(lua_State* L); +extern void cutorch_CudaIntTensor_init(lua_State* L); +extern void cutorch_CudaLongTensor_init(lua_State* L); extern void cutorch_CudaTensor_init(lua_State* L); +extern void cutorch_CudaDoubleTensor_init(lua_State* L); + extern void cutorch_CudaTensorMath_init(lua_State* L); extern void cutorch_CudaTensorOperator_init(lua_State* L); @@ -875,8 +889,22 @@ int luaopen_libcutorch(lua_State *L) lua_setfield(L, -2, "magma"); #endif + cutorch_CudaByteStorage_init(L); + cutorch_CudaCharStorage_init(L); + cutorch_CudaShortStorage_init(L); + cutorch_CudaIntStorage_init(L); + cutorch_CudaLongStorage_init(L); cutorch_CudaStorage_init(L); + cutorch_CudaDoubleStorage_init(L); + + cutorch_CudaByteTensor_init(L); + cutorch_CudaCharTensor_init(L); + cutorch_CudaShortTensor_init(L); + cutorch_CudaIntTensor_init(L); + cutorch_CudaLongTensor_init(L); cutorch_CudaTensor_init(L); + cutorch_CudaDoubleTensor_init(L); + cutorch_CudaTensorMath_init(L); cutorch_CudaTensorOperator_init(L); cutorch_Event_init(L); @@ -1,8 +1,20 @@ require "torch" paths.require "libcutorch" -torch.CudaStorage.__tostring__ = torch.FloatStorage.__tostring__ -torch.CudaTensor.__tostring__ = torch.FloatTensor.__tostring__ +torch.CudaByteStorage.__tostring__ = torch.ByteStorage.__tostring__ +torch.CudaByteTensor.__tostring__ = torch.ByteTensor.__tostring__ +torch.CudaCharStorage.__tostring__ = torch.CharStorage.__tostring__ +torch.CudaCharTensor.__tostring__ = torch.CharTensor.__tostring__ +torch.CudaShortStorage.__tostring__ = torch.ShortStorage.__tostring__ +torch.CudaShortTensor.__tostring__ = torch.ShortTensor.__tostring__ +torch.CudaIntStorage.__tostring__ = torch.IntStorage.__tostring__ +torch.CudaIntTensor.__tostring__ = torch.IntTensor.__tostring__ +torch.CudaLongStorage.__tostring__ = torch.LongStorage.__tostring__ +torch.CudaLongTensor.__tostring__ = torch.LongTensor.__tostring__ +torch.CudaStorage.__tostring__ = torch.FloatStorage.__tostring__ +torch.CudaTensor.__tostring__ = torch.FloatTensor.__tostring__ +torch.CudaDoubleStorage.__tostring__ = torch.DoubleStorage.__tostring__ +torch.CudaDoubleTensor.__tostring__ = torch.DoubleTensor.__tostring__ include('Tensor.lua') include('FFI.lua') @@ -16,7 +28,7 @@ function cutorch.withDevice(newDeviceID, closure) local vals = {pcall(closure)} cutorch.setDevice(curDeviceID) if vals[1] then - return unpack(vals, 2) + return unpack(vals, 2) end error(unpack(vals, 2)) end @@ -37,6 +49,4 @@ function cutorch.createCudaHostTensor(...) return torch.FloatTensor(storage, 1, size:storage()) end -cutorch.setHeapTracking(true) - return cutorch diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt index a422789..c4591b8 100644 --- a/lib/THC/CMakeLists.txt +++ b/lib/THC/CMakeLists.txt @@ -135,3 +135,18 @@ INSTALL(FILES THCDeviceTensorUtils.cuh THCDeviceTensorUtils-inl.cuh DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC") + +INSTALL(FILES + generic/THCStorage.c + generic/THCStorage.cu + generic/THCStorage.h + generic/THCTensor.c + generic/THCTensor.cu + generic/THCTensor.h + generic/THCStorageCopy.c + generic/THCStorageCopy.cu + generic/THCStorageCopy.h + generic/THCTensorCopy.c + generic/THCTensorCopy.cu + generic/THCTensorCopy.h + DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC/generic") diff --git a/lib/THC/THCGenerateAllTypes.h b/lib/THC/THCGenerateAllTypes.h new file mode 100644 index 0000000..4793d06 --- /dev/null +++ b/lib/THC/THCGenerateAllTypes.h @@ -0,0 +1,114 @@ +#ifndef THC_GENERIC_FILE +#error "You must define THC_GENERIC_FILE before including THGenerateAllTypes.h" +#endif + +#define THCTypeIdxByte 1 +#define THCTypeIdxChar 2 +#define THCTypeIdxShort 3 +#define THCTypeIdxInt 4 +#define THCTypeIdxLong 5 +#define THCTypeIdxFloat 6 +#define THCTypeIdxDouble 7 +#define THCTypeIdx_(T) TH_CONCAT_2(THCTypeIdx,T) + +#define real unsigned char +#define accreal long +#define Real Byte +#define CReal CudaByte +#define THC_REAL_IS_BYTE +#line 1 THC_GENERIC_FILE +/*#line 1 "THByteStorage.h"*/ +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_BYTE + +#define real char +#define accreal long +#define Real Char +#define CReal CudaChar +#define THC_REAL_IS_CHAR +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_CHAR + +#define real short +#define accreal long +#define Real Short +#define CReal CudaShort +#define THC_REAL_IS_SHORT +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_SHORT + +#define real int +#define accreal long +#define Real Int +#define CReal CudaInt +#define THC_REAL_IS_INT +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_INT + +#define real long +#define accreal long +#define Real Long +#define CReal CudaLong +#define THC_REAL_IS_LONG +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_LONG + +#define real float +#define accreal double +#define Real Float +#define CReal Cuda +#define THC_REAL_IS_FLOAT +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_FLOAT + +#define real double +#define accreal double +#define Real Double +#define CReal CudaDouble +#define THC_REAL_IS_DOUBLE +#line 1 THC_GENERIC_FILE +#include THC_GENERIC_FILE +#undef real +#undef accreal +#undef Real +#undef CReal +#undef THC_REAL_IS_DOUBLE + +#undef THCTypeIdxByte +#undef THCTypeIdxChar +#undef THCTypeIdxShort +#undef THCTypeIdxInt +#undef THCTypeIdxLong +#undef THCTypeIdxFloat +#undef THCTypeIdxDouble + +#undef THC_GENERIC_FILE diff --git a/lib/THC/THCStorage.c b/lib/THC/THCStorage.c new file mode 100644 index 0000000..6fc9574 --- /dev/null +++ b/lib/THC/THCStorage.c @@ -0,0 +1,6 @@ +#include "THCStorage.h" +#include "THCGeneral.h" +#include "THAtomic.h" + +#include "generic/THCStorage.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorage.cu b/lib/THC/THCStorage.cu new file mode 100644 index 0000000..2ceb0c7 --- /dev/null +++ b/lib/THC/THCStorage.cu @@ -0,0 +1,10 @@ +#include "THCStorage.h" + +#include <thrust/device_ptr.h> +#include <thrust/fill.h> +#if CUDA_VERSION >= 7000 +#include <thrust/system/cuda/execution_policy.h> +#endif + +#include "generic/THCStorage.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorage.h b/lib/THC/THCStorage.h new file mode 100644 index 0000000..ac1cd70 --- /dev/null +++ b/lib/THC/THCStorage.h @@ -0,0 +1,17 @@ +#ifndef THC_STORAGE_INC +#define THC_STORAGE_INC + +#include "THStorage.h" +#include "THCGeneral.h" + +#define THCStorage TH_CONCAT_3(TH,CReal,Storage) +#define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME) + +/* fast access methods */ +#define THC_STORAGE_GET(storage, idx) ((storage)->data[(idx)]) +#define THC_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value)) + +#include "generic/THCStorage.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/THCStorageCopy.c b/lib/THC/THCStorageCopy.c new file mode 100644 index 0000000..76a8076 --- /dev/null +++ b/lib/THC/THCStorageCopy.c @@ -0,0 +1,5 @@ +#include "THCStorageCopy.h" +#include "THCGeneral.h" + +#include "generic/THCStorageCopy.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorageCopy.cu b/lib/THC/THCStorageCopy.cu new file mode 100644 index 0000000..b8b0417 --- /dev/null +++ b/lib/THC/THCStorageCopy.cu @@ -0,0 +1,5 @@ +#include "THCStorageCopy.h" +#include "THCGeneral.h" + +#include "generic/THCStorageCopy.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCStorageCopy.h b/lib/THC/THCStorageCopy.h new file mode 100644 index 0000000..ec8011d --- /dev/null +++ b/lib/THC/THCStorageCopy.h @@ -0,0 +1,10 @@ +#ifndef THC_STORAGE_COPY_INC +#define THC_STORAGE_COPY_INC + +#include "THCStorage.h" +#include "THCGeneral.h" + +#include "generic/THCStorageCopy.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/THCTensor.c b/lib/THC/THCTensor.c new file mode 100644 index 0000000..3bcf69d --- /dev/null +++ b/lib/THC/THCTensor.c @@ -0,0 +1,7 @@ +#include "THCGeneral.h" +#include "THCTensor.h" +#include "THCTensorCopy.h" +#include "THAtomic.h" + +#include "generic/THCTensor.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensor.cu b/lib/THC/THCTensor.cu new file mode 100644 index 0000000..1e6fc20 --- /dev/null +++ b/lib/THC/THCTensor.cu @@ -0,0 +1,4 @@ +#include "THCTensor.h" + +#include "generic/THCTensor.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensor.h b/lib/THC/THCTensor.h new file mode 100644 index 0000000..d4eb49a --- /dev/null +++ b/lib/THC/THCTensor.h @@ -0,0 +1,14 @@ +#ifndef THC_TENSOR_INC +#define THC_TENSOR_INC + +#include "THTensor.h" +#include "THCStorage.h" +#include "THCGeneral.h" + +#define THCTensor TH_CONCAT_3(TH,CReal,Tensor) +#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME) + +#include "generic/THCTensor.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/THCTensorCopy.c b/lib/THC/THCTensorCopy.c new file mode 100644 index 0000000..6813530 --- /dev/null +++ b/lib/THC/THCTensorCopy.c @@ -0,0 +1,6 @@ +#include "THCTensorCopy.h" +#include "THCGeneral.h" +#include "THCTensor.h" + +#include "generic/THCTensorCopy.c" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensorCopy.cu b/lib/THC/THCTensorCopy.cu new file mode 100644 index 0000000..9e59dd8 --- /dev/null +++ b/lib/THC/THCTensorCopy.cu @@ -0,0 +1,10 @@ +#include "THCApply.cuh" + +inline int curGPU() { + int curDev; + THCudaCheck(cudaGetDevice(&curDev)); + return curDev; +} + +#include "generic/THCTensorCopy.cu" +#include "THCGenerateAllTypes.h" diff --git a/lib/THC/THCTensorCopy.h b/lib/THC/THCTensorCopy.h new file mode 100644 index 0000000..fc206cb --- /dev/null +++ b/lib/THC/THCTensorCopy.h @@ -0,0 +1,10 @@ +#ifndef TH_CUDA_TENSOR_COPY_INC +#define TH_CUDA_TENSOR_COPY_INC + +#include "THCTensor.h" +#include "THCGeneral.h" + +#include "generic/THCTensorCopy.h" +#include "THCGenerateAllTypes.h" + +#endif diff --git a/lib/THC/generic/THCStorage.c b/lib/THC/generic/THCStorage.c index e7c529c..946ebc1 100644 --- a/lib/THC/generic/THCStorage.c +++ b/lib/THC/generic/THCStorage.c @@ -1,39 +1,39 @@ -#include "THCStorage.h" -#include "THCGeneral.h" -#include "THAtomic.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorage.c" +#else -float* THCudaStorage_data(THCState *state, const THCudaStorage *self) +real* THCStorage_(data)(THCState *state, const THCStorage *self) { return self->data; } -long THCudaStorage_size(THCState *state, const THCudaStorage *self) +long THCStorage_(size)(THCState *state, const THCStorage *self) { return self->size; } -int THCudaStorage_elementSize(THCState *state) +int THCStorage_(elementSize)(THCState *state) { - return sizeof(float); + return sizeof(real); } -void THCudaStorage_set(THCState *state, THCudaStorage *self, long index, float value) +void THCStorage_(set)(THCState *state, THCStorage *self, long index, real value) { THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds"); - THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(float), cudaMemcpyHostToDevice)); + THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(real), cudaMemcpyHostToDevice)); } -float THCudaStorage_get(THCState *state, const THCudaStorage *self, long index) +real THCStorage_(get)(THCState *state, const THCStorage *self, long index) { - float value; + real value; THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds"); - THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(float), cudaMemcpyDeviceToHost)); + THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(real), cudaMemcpyDeviceToHost)); return value; } -THCudaStorage* THCudaStorage_new(THCState *state) +THCStorage* THCStorage_(new)(THCState *state) { - THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); + THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); storage->data = NULL; storage->size = 0; storage->refcount = 1; @@ -41,20 +41,20 @@ THCudaStorage* THCudaStorage_new(THCState *state) return storage; } -THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size) +THCStorage* THCStorage_(newWithSize)(THCState *state, long size) { THArgCheck(size >= 0, 2, "invalid size"); if(size > 0) { - THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); + THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); // update heap *before* attempting malloc, to free space for the malloc - THCHeapUpdate(state, size * sizeof(float)); + THCHeapUpdate(state, size * sizeof(real)); cudaError_t err = - THCudaMalloc(state, (void**)&(storage->data), size * sizeof(float)); + THCudaMalloc(state, (void**)&(storage->data), size * sizeof(real)); if(err != cudaSuccess){ - THCHeapUpdate(state, -size * sizeof(float)); + THCHeapUpdate(state, -size * sizeof(real)); } THCudaCheck(err); @@ -65,53 +65,53 @@ THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size) } else { - return THCudaStorage_new(state); + return THCStorage_(new)(state); } } -THCudaStorage* THCudaStorage_newWithSize1(THCState *state, float data0) +THCStorage* THCStorage_(newWithSize1)(THCState *state, real data0) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 1); - THCudaStorage_set(state, self, 0, data0); + THCStorage *self = THCStorage_(newWithSize)(state, 1); + THCStorage_(set)(state, self, 0, data0); return self; } -THCudaStorage* THCudaStorage_newWithSize2(THCState *state, float data0, float data1) +THCStorage* THCStorage_(newWithSize2)(THCState *state, real data0, real data1) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 2); - THCudaStorage_set(state, self, 0, data0); - THCudaStorage_set(state, self, 1, data1); + THCStorage *self = THCStorage_(newWithSize)(state, 2); + THCStorage_(set)(state, self, 0, data0); + THCStorage_(set)(state, self, 1, data1); return self; } -THCudaStorage* THCudaStorage_newWithSize3(THCState *state, float data0, float data1, float data2) +THCStorage* THCStorage_(newWithSize3)(THCState *state, real data0, real data1, real data2) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 3); - THCudaStorage_set(state, self, 0, data0); - THCudaStorage_set(state, self, 1, data1); - THCudaStorage_set(state, self, 2, data2); + THCStorage *self = THCStorage_(newWithSize)(state, 3); + THCStorage_(set)(state, self, 0, data0); + THCStorage_(set)(state, self, 1, data1); + THCStorage_(set)(state, self, 2, data2); return self; } -THCudaStorage* THCudaStorage_newWithSize4(THCState *state, float data0, float data1, float data2, float data3) +THCStorage* THCStorage_(newWithSize4)(THCState *state, real data0, real data1, real data2, real data3) { - THCudaStorage *self = THCudaStorage_newWithSize(state, 4); - THCudaStorage_set(state, self, 0, data0); - THCudaStorage_set(state, self, 1, data1); - THCudaStorage_set(state, self, 2, data2); - THCudaStorage_set(state, self, 3, data3); + THCStorage *self = THCStorage_(newWithSize)(state, 4); + THCStorage_(set)(state, self, 0, data0); + THCStorage_(set)(state, self, 1, data1); + THCStorage_(set)(state, self, 2, data2); + THCStorage_(set)(state, self, 3, data3); return self; } -THCudaStorage* THCudaStorage_newWithMapping(THCState *state, const char *fileName, long size, int isShared) +THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, long size, int isShared) { - THError("not available yet for THCudaStorage"); + THError("not available yet for THCStorage"); return NULL; } -THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size) +THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size) { - THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); + THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); storage->data = data; storage->size = size; storage->refcount = 1; @@ -119,23 +119,23 @@ THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size return storage; } -void THCudaStorage_setFlag(THCState *state, THCudaStorage *storage, const char flag) +void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag) { storage->flag |= flag; } -void THCudaStorage_clearFlag(THCState *state, THCudaStorage *storage, const char flag) +void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag) { storage->flag &= ~flag; } -void THCudaStorage_retain(THCState *state, THCudaStorage *self) +void THCStorage_(retain)(THCState *state, THCStorage *self) { if(self && (self->flag & TH_STORAGE_REFCOUNTED)) THAtomicIncrementRef(&self->refcount); } -void THCudaStorage_free(THCState *state, THCudaStorage *self) +void THCStorage_(free)(THCState *state, THCStorage *self) { if(!(self->flag & TH_STORAGE_REFCOUNTED)) return; @@ -143,9 +143,10 @@ void THCudaStorage_free(THCState *state, THCudaStorage *self) if (THAtomicDecrementRef(&self->refcount)) { if(self->flag & TH_STORAGE_FREEMEM) { - THCHeapUpdate(state, -self->size * sizeof(float)); + THCHeapUpdate(state, -self->size * sizeof(real)); THCudaCheck(THCudaFree(state, self->data)); } THFree(self); } } +#endif diff --git a/lib/THC/generic/THCStorage.cu b/lib/THC/generic/THCStorage.cu index c61f08e..740b2bd 100644 --- a/lib/THC/generic/THCStorage.cu +++ b/lib/THC/generic/THCStorage.cu @@ -1,14 +1,10 @@ -#include "THCStorage.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorage.cu" +#else -#include <thrust/device_ptr.h> -#include <thrust/fill.h> -#if CUDA_VERSION >= 7000 -#include <thrust/system/cuda/execution_policy.h> -#endif - -void THCudaStorage_fill(THCState *state, THCudaStorage *self, float value) +void THCStorage_(fill)(THCState *state, THCStorage *self, real value) { - thrust::device_ptr<float> self_data(self->data); + thrust::device_ptr<real> self_data(self->data); thrust::fill( #if CUDA_VERSION >= 7000 thrust::cuda::par.on(THCState_getCurrentStream(state)), @@ -16,7 +12,7 @@ void THCudaStorage_fill(THCState *state, THCudaStorage *self, float value) self_data, self_data+self->size, value); } -void THCudaStorage_resize(THCState *state, THCudaStorage *self, long size) +void THCStorage_(resize)(THCState *state, THCStorage *self, long size) { THArgCheck(size >= 0, 2, "invalid size"); @@ -27,33 +23,34 @@ void THCudaStorage_resize(THCState *state, THCudaStorage *self, long size) { if(self->flag & TH_STORAGE_FREEMEM) { THCudaCheck(THCudaFree(state, self->data)); - THCHeapUpdate(state, -self->size * sizeof(float)); + THCHeapUpdate(state, -self->size * sizeof(real)); } self->data = NULL; self->size = 0; } else { - float *data = NULL; + real *data = NULL; // update heap *before* attempting malloc, to free space for the malloc - THCHeapUpdate(state, size * sizeof(float)); - cudaError_t err = THCudaMalloc(state, (void**)(&data), size * sizeof(float)); + THCHeapUpdate(state, size * sizeof(real)); + cudaError_t err = THCudaMalloc(state, (void**)(&data), size * sizeof(real)); if(err != cudaSuccess) { - THCHeapUpdate(state, -size * sizeof(float)); + THCHeapUpdate(state, -size * sizeof(real)); } THCudaCheck(err); if (self->data) { THCudaCheck(cudaMemcpyAsync(data, self->data, - THMin(self->size, size) * sizeof(float), + THMin(self->size, size) * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); THCudaCheck(THCudaFree(state, self->data)); - THCHeapUpdate(state, -self->size * sizeof(float)); + THCHeapUpdate(state, -self->size * sizeof(real)); } self->data = data; self->size = size; } } +#endif diff --git a/lib/THC/generic/THCStorage.h b/lib/THC/generic/THCStorage.h index 6549e5c..a8c5f5f 100644 --- a/lib/THC/generic/THCStorage.h +++ b/lib/THC/generic/THCStorage.h @@ -1,57 +1,54 @@ -#ifndef THC_STORAGE_INC -#define THC_STORAGE_INC - -#include "THStorage.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorage.h" +#else #define TH_STORAGE_REFCOUNTED 1 #define TH_STORAGE_RESIZABLE 2 #define TH_STORAGE_FREEMEM 4 - -typedef struct THCudaStorage +typedef struct THCStorage { - float *data; + real *data; long size; int refcount; char flag; THAllocator *allocator; void *allocatorContext; - struct THCudaStorage *view; -} THCudaStorage; + struct THCStorage *view; +} THCStorage; -THC_API float* THCudaStorage_data(THCState *state, const THCudaStorage*); -THC_API long THCudaStorage_size(THCState *state, const THCudaStorage*); -THC_API int THCudaStorage_elementSize(THCState *state); +THC_API real* THCStorage_(data)(THCState *state, const THCStorage*); +THC_API long THCStorage_(size)(THCState *state, const THCStorage*); +THC_API int THCStorage_(elementSize)(THCState *state); /* slow access -- checks everything */ -THC_API void THCudaStorage_set(THCState *state, THCudaStorage*, long, float); -THC_API float THCudaStorage_get(THCState *state, const THCudaStorage*, long); +THC_API void THCStorage_(set)(THCState *state, THCStorage*, long, real); +THC_API real THCStorage_(get)(THCState *state, const THCStorage*, long); -THC_API THCudaStorage* THCudaStorage_new(THCState *state); -THC_API THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size); -THC_API THCudaStorage* THCudaStorage_newWithSize1(THCState *state, float); -THC_API THCudaStorage* THCudaStorage_newWithSize2(THCState *state, float, float); -THC_API THCudaStorage* THCudaStorage_newWithSize3(THCState *state, float, float, float); -THC_API THCudaStorage* THCudaStorage_newWithSize4(THCState *state, float, float, float, float); -THC_API THCudaStorage* THCudaStorage_newWithMapping(THCState *state, const char *filename, long size, int shared); +THC_API THCStorage* THCStorage_(new)(THCState *state); +THC_API THCStorage* THCStorage_(newWithSize)(THCState *state, long size); +THC_API THCStorage* THCStorage_(newWithSize1)(THCState *state, real); +THC_API THCStorage* THCStorage_(newWithSize2)(THCState *state, real, real); +THC_API THCStorage* THCStorage_(newWithSize3)(THCState *state, real, real, real); +THC_API THCStorage* THCStorage_(newWithSize4)(THCState *state, real, real, real, real); +THC_API THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *filename, long size, int shared); /* takes ownership of data */ -THC_API THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size); +THC_API THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size); -THC_API THCudaStorage* THCudaStorage_newWithAllocator(THCState *state, long size, +THC_API THCStorage* THCStorage_(newWithAllocator)(THCState *state, long size, THAllocator* allocator, void *allocatorContext); -THC_API THCudaStorage* THCudaStorage_newWithDataAndAllocator( - THCState *state, float* data, long size, THAllocator* allocator, void *allocatorContext); +THC_API THCStorage* THCStorage_(newWithDataAndAllocator)( + THCState *state, real* data, long size, THAllocator* allocator, void *allocatorContext); -THC_API void THCudaStorage_setFlag(THCState *state, THCudaStorage *storage, const char flag); -THC_API void THCudaStorage_clearFlag(THCState *state, THCudaStorage *storage, const char flag); -THC_API void THCudaStorage_retain(THCState *state, THCudaStorage *storage); +THC_API void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag); +THC_API void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag); +THC_API void THCStorage_(retain)(THCState *state, THCStorage *storage); -THC_API void THCudaStorage_free(THCState *state, THCudaStorage *storage); -THC_API void THCudaStorage_resize(THCState *state, THCudaStorage *storage, long size); -THC_API void THCudaStorage_fill(THCState *state, THCudaStorage *storage, float value); +THC_API void THCStorage_(free)(THCState *state, THCStorage *storage); +THC_API void THCStorage_(resize)(THCState *state, THCStorage *storage, long size); +THC_API void THCStorage_(fill)(THCState *state, THCStorage *storage, real value); #endif diff --git a/lib/THC/generic/THCStorageCopy.c b/lib/THC/generic/THCStorageCopy.c index 3517b2c..1696307 100644 --- a/lib/THC/generic/THCStorageCopy.c +++ b/lib/THC/generic/THCStorageCopy.c @@ -1,21 +1,26 @@ -#include "THCStorageCopy.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorageCopy.c" +#else -void THCudaStorage_copyFloat(THCState *state, THCudaStorage *self, struct THFloatStorage *src) +void THCStorage_(copyCPU)(THCState *state, THCStorage *self, struct THStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(float), cudaMemcpyHostToDevice)); + THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(real), cudaMemcpyHostToDevice)); } -#define TH_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC) \ - void THCudaStorage_copy##TYPEC(THCState *state, THCudaStorage *self, struct TH##TYPEC##Storage *src) \ - { \ - THFloatStorage *buffer; \ - THArgCheck(self->size == src->size, 2, "size does not match"); \ - buffer = THFloatStorage_newWithSize(src->size); \ - THFloatStorage_copy##TYPEC(buffer, src); \ - THCudaStorage_copyFloat(state, self, buffer); \ - THFloatStorage_free(buffer); \ +#define TH_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC) \ + void THCStorage_(copy##TYPEC)(THCState *state, THCStorage *self, struct TH##TYPEC##Storage *src) \ + { \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCStorage_(copyCPU)(state, self, (THStorage*) src); /* cast just removes compiler warning */ \ + } else { \ + THStorage *buffer; \ + THArgCheck(self->size == src->size, 2, "size does not match"); \ + buffer = THStorage_(newWithSize)(src->size); \ + THStorage_(copy##TYPEC)(buffer, src); \ + THCStorage_(copyCPU)(state, self, buffer); \ + THStorage_(free)(buffer); \ + } \ } TH_CUDA_STORAGE_IMPLEMENT_COPY(Byte) @@ -23,23 +28,28 @@ TH_CUDA_STORAGE_IMPLEMENT_COPY(Char) TH_CUDA_STORAGE_IMPLEMENT_COPY(Short) TH_CUDA_STORAGE_IMPLEMENT_COPY(Int) TH_CUDA_STORAGE_IMPLEMENT_COPY(Long) +TH_CUDA_STORAGE_IMPLEMENT_COPY(Float) TH_CUDA_STORAGE_IMPLEMENT_COPY(Double) -void THFloatStorage_copyCuda(THCState *state, THFloatStorage *self, struct THCudaStorage *src) +void THStorage_(copyCuda)(THCState *state, THStorage *self, struct THCStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToHost)); + THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToHost)); } -#define TH_CUDA_STORAGE_IMPLEMENT_COPYTO(TYPEC) \ - void TH##TYPEC##Storage_copyCuda(THCState *state, TH##TYPEC##Storage *self, struct THCudaStorage *src) \ +#define TH_CUDA_STORAGE_IMPLEMENT_COPYTO(TYPEC) \ + void TH_CONCAT_4(TH,TYPEC,Storage_copyCuda,Real)(THCState *state, TH##TYPEC##Storage *self, struct THCStorage *src) \ { \ - THFloatStorage *buffer; \ - THArgCheck(self->size == src->size, 2, "size does not match"); \ - buffer = THFloatStorage_newWithSize(src->size); \ - THFloatStorage_copyCuda(state, buffer, src); \ - TH##TYPEC##Storage_copyFloat(self, buffer); \ - THFloatStorage_free(buffer); \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THStorage_(copyCuda)(state, (THStorage*) self, src); /* cast just removes compiler warnings */ \ + } else { \ + THStorage *buffer; \ + THArgCheck(self->size == src->size, 2, "size does not match"); \ + buffer = THStorage_(newWithSize)(src->size); \ + THStorage_(copyCuda)(state, buffer, src); \ + TH_CONCAT_4(TH,TYPEC,Storage_copy,Real)(self, buffer); \ + THStorage_(free)(buffer); \ + } \ } TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Byte) @@ -47,4 +57,7 @@ TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Char) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Short) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Int) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Long) +TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Float) TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Double) + +#endif diff --git a/lib/THC/generic/THCStorageCopy.cu b/lib/THC/generic/THCStorageCopy.cu index c3205b5..ffe8332 100644 --- a/lib/THC/generic/THCStorageCopy.cu +++ b/lib/THC/generic/THCStorageCopy.cu @@ -1,19 +1,49 @@ -#include "THCStorageCopy.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorageCopy.cu" +#else -void THCudaStorage_rawCopy(THCState *state, THCudaStorage *self, float *src) +void THCStorage_(rawCopy)(THCState *state, THCStorage *self, real *src) { - THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); + THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); } -void THCudaStorage_copy(THCState *state, THCudaStorage *self, THCudaStorage *src) +void THCStorage_(copy)(THCState *state, THCStorage *self, THCStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); + THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); } -void THCudaStorage_copyCuda(THCState *state, THCudaStorage *self, THCudaStorage *src) +void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src) { THArgCheck(self->size == src->size, 2, "size does not match"); - THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); + THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); } + +// conversions are mediated by the CPU +// yes, this is slow; feel free to write CUDA kernels for this +#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA) \ + void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src) \ + { \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCStorage_(copy)(state, self, (THCStorage*) src); /* cast just removes compiler warning */ \ + } else { \ + THArgCheck(self->size == src->size, 2, "size does not match"); \ + TH##TYPEC##Storage *buffer1 = TH##TYPEC##Storage_newWithSize(src->size); \ + THStorage *buffer2 = THStorage_(newWithSize)(src->size); \ + TH##TYPEC##Storage_copyCuda(state, buffer1, src); \ + THStorage_(copy##TYPEC)(buffer2, buffer1); \ + THCStorage_(copyCPU)(state, self, buffer2); \ + TH##TYPEC##Storage_free(buffer1); \ + THStorage_(free)(buffer2); \ + } \ + } + +THC_CUDA_STORAGE_IMPLEMENT_COPY(Byte,Byte) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Char,Char) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Short,Short) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Int,Int) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Long,Long) +THC_CUDA_STORAGE_IMPLEMENT_COPY(Float,) // i.e. float +THC_CUDA_STORAGE_IMPLEMENT_COPY(Double,Double) + +#endif diff --git a/lib/THC/generic/THCStorageCopy.h b/lib/THC/generic/THCStorageCopy.h index 4c2f8df..d80661d 100644 --- a/lib/THC/generic/THCStorageCopy.h +++ b/lib/THC/generic/THCStorageCopy.h @@ -1,28 +1,37 @@ -#ifndef THC_STORAGE_COPY_INC -#define THC_STORAGE_COPY_INC - -#include "THCStorage.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCStorageCopy.h" +#else /* Support for copy between different Storage types */ -THC_API void THCudaStorage_rawCopy(THCState *state, THCudaStorage *storage, float *src); -THC_API void THCudaStorage_copy(THCState *state, THCudaStorage *storage, THCudaStorage *src); -THC_API void THCudaStorage_copyByte(THCState *state, THCudaStorage *storage, struct THByteStorage *src); -THC_API void THCudaStorage_copyChar(THCState *state, THCudaStorage *storage, struct THCharStorage *src); -THC_API void THCudaStorage_copyShort(THCState *state, THCudaStorage *storage, struct THShortStorage *src); -THC_API void THCudaStorage_copyInt(THCState *state, THCudaStorage *storage, struct THIntStorage *src); -THC_API void THCudaStorage_copyLong(THCState *state, THCudaStorage *storage, struct THLongStorage *src); -THC_API void THCudaStorage_copyFloat(THCState *state, THCudaStorage *storage, struct THFloatStorage *src); -THC_API void THCudaStorage_copyDouble(THCState *state, THCudaStorage *storage, struct THDoubleStorage *src); +THC_API void THCStorage_(rawCopy)(THCState *state, THCStorage *storage, real *src); +THC_API void THCStorage_(copy)(THCState *state, THCStorage *storage, THCStorage *src); +THC_API void THCStorage_(copyByte)(THCState *state, THCStorage *storage, struct THByteStorage *src); +THC_API void THCStorage_(copyChar)(THCState *state, THCStorage *storage, struct THCharStorage *src); +THC_API void THCStorage_(copyShort)(THCState *state, THCStorage *storage, struct THShortStorage *src); +THC_API void THCStorage_(copyInt)(THCState *state, THCStorage *storage, struct THIntStorage *src); +THC_API void THCStorage_(copyLong)(THCState *state, THCStorage *storage, struct THLongStorage *src); +THC_API void THCStorage_(copyFloat)(THCState *state, THCStorage *storage, struct THFloatStorage *src); +THC_API void THCStorage_(copyDouble)(THCState *state, THCStorage *storage, struct THDoubleStorage *src); + +THC_API void THCStorage_(copyCudaByte)(THCState *state, THCStorage *storage, struct THCudaByteStorage *src); +THC_API void THCStorage_(copyCudaChar)(THCState *state, THCStorage *storage, struct THCudaCharStorage *src); +THC_API void THCStorage_(copyCudaShort)(THCState *state, THCStorage *storage, struct THCudaShortStorage *src); +THC_API void THCStorage_(copyCudaInt)(THCState *state, THCStorage *storage, struct THCudaIntStorage *src); +THC_API void THCStorage_(copyCudaLong)(THCState *state, THCStorage *storage, struct THCudaLongStorage *src); +THC_API void THCStorage_(copyCudaFloat)(THCState *state, THCStorage *storage, struct THCudaStorage *src); +THC_API void THCStorage_(copyCudaDouble)(THCState *state, THCStorage *storage, struct THCudaDoubleStorage *src); + -THC_API void THByteStorage_copyCuda(THCState *state, THByteStorage *self, struct THCudaStorage *src); -THC_API void THCharStorage_copyCuda(THCState *state, THCharStorage *self, struct THCudaStorage *src); -THC_API void THShortStorage_copyCuda(THCState *state, THShortStorage *self, struct THCudaStorage *src); -THC_API void THIntStorage_copyCuda(THCState *state, THIntStorage *self, struct THCudaStorage *src); -THC_API void THLongStorage_copyCuda(THCState *state, THLongStorage *self, struct THCudaStorage *src); -THC_API void THFloatStorage_copyCuda(THCState *state, THFloatStorage *self, struct THCudaStorage *src); -THC_API void THDoubleStorage_copyCuda(THCState *state, THDoubleStorage *self, struct THCudaStorage *src); -THC_API void THCudaStorage_copyCuda(THCState *state, THCudaStorage *self, THCudaStorage *src); +THC_API void TH_CONCAT_2(THByteStorage_copyCuda , Real)(THCState *state, THByteStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THCharStorage_copyCuda , Real)(THCState *state, THCharStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THShortStorage_copyCuda , Real)(THCState *state, THShortStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THIntStorage_copyCuda , Real)(THCState *state, THIntStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THLongStorage_copyCuda , Real)(THCState *state, THLongStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THFloatStorage_copyCuda , Real)(THCState *state, THFloatStorage *self, struct THCStorage *src); +THC_API void TH_CONCAT_2(THDoubleStorage_copyCuda, Real)(THCState *state, THDoubleStorage *self, struct THCStorage *src); +THC_API void THStorage_(copyCuda)(THCState *state, THStorage *self, THCStorage *src); +THC_API void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src); +THC_API void THCStorage_(copyCPU)(THCState *state, THCStorage *self, THStorage *src); #endif diff --git a/lib/THC/generic/THCTensor.c b/lib/THC/generic/THCTensor.c index fa325e3..6645c0b 100644 --- a/lib/THC/generic/THCTensor.c +++ b/lib/THC/generic/THCTensor.c @@ -1,51 +1,50 @@ -#include "THCGeneral.h" -#include "THCTensor.h" -#include "THCTensorCopy.h" -#include "THAtomic.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensor.c" +#else /**** access methods ****/ -THCudaStorage *THCudaTensor_storage(THCState *state, const THCudaTensor *self) +THCStorage *THCTensor_(storage)(THCState *state, const THCTensor *self) { return self->storage; } -long THCudaTensor_storageOffset(THCState *state, const THCudaTensor *self) +long THCTensor_(storageOffset)(THCState *state, const THCTensor *self) { return self->storageOffset; } -int THCudaTensor_nDimension(THCState *state, const THCudaTensor *self) +int THCTensor_(nDimension)(THCState *state, const THCTensor *self) { return self->nDimension; } -long THCudaTensor_size(THCState *state, const THCudaTensor *self, int dim) +long THCTensor_(size)(THCState *state, const THCTensor *self, int dim) { THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "out of range"); return self->size[dim]; } -long THCudaTensor_stride(THCState *state, const THCudaTensor *self, int dim) +long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim) { THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "out of range"); return self->stride[dim]; } -THLongStorage *THCudaTensor_newSizeOf(THCState *state, THCudaTensor *self) +THLongStorage *THCTensor_(newSizeOf)(THCState *state, THCTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(size, self->size); return size; } -THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self) +THLongStorage *THCTensor_(newStrideOf)(THCState *state, THCTensor *self) { THLongStorage *stride = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(stride, self->stride); return stride; } -float *THCudaTensor_data(THCState *state, const THCudaTensor *self) +real *THCTensor_(data)(THCState *state, const THCTensor *self) { if(self->storage) return (self->storage->data+self->storageOffset); @@ -53,36 +52,36 @@ float *THCudaTensor_data(THCState *state, const THCudaTensor *self) return NULL; } -void THCudaTensor_setFlag(THCState *state, THCudaTensor *self, const char flag) +void THCTensor_(setFlag)(THCState *state, THCTensor *self, const char flag) { self->flag |= flag; } -void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag) +void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag) { self->flag &= ~flag; } /**** creation methods ****/ -static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self); -static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride); +static void THCTensor_(rawInit)(THCState *state, THCTensor *self); +static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride); /* Empty init */ -THCudaTensor *THCudaTensor_new(THCState *state) +THCTensor *THCTensor_(new)(THCState *state) { - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); return self; } /* Pointer-copy init */ -THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor) +THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor) { - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawSet(state, + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); + THCTensor_(rawSet)(state, self, tensor->storage, tensor->storageOffset, @@ -93,14 +92,14 @@ THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor) } /* Storage init */ -THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride) +THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride) { - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); if(size && stride) THArgCheck(size->size == stride->size, 4, "inconsistent size"); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawSet(state, + THCTensor_(rawInit)(state, self); + THCTensor_(rawSet)(state, self, storage, storageOffset, @@ -110,28 +109,28 @@ THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storag return self; } -THCudaTensor *THCudaTensor_newWithStorage1d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0) { - return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1); + return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1); } -THCudaTensor *THCudaTensor_newWithStorage2d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1) { - return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1); + return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1); } -THCudaTensor *THCudaTensor_newWithStorage3d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2) { - return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1); + return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1); } -THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage, long storageOffset, +THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2, @@ -140,102 +139,102 @@ THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *stor long size[4] = {size0, size1, size2, size3}; long stride[4] = {stride0, stride1, stride2, stride3}; - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawSet(state, self, storage, storageOffset, 4, size, stride); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); + THCTensor_(rawSet)(state, self, storage, storageOffset, 4, size, stride); return self; } -THCudaTensor *THCudaTensor_newWithSize(THCState *state, THLongStorage *size, THLongStorage *stride) +THCTensor *THCTensor_(newWithSize)(THCState *state, THLongStorage *size, THLongStorage *stride) { - return THCudaTensor_newWithStorage(state, NULL, 0, size, stride); + return THCTensor_(newWithStorage)(state, NULL, 0, size, stride); } -THCudaTensor *THCudaTensor_newWithSize1d(THCState *state, long size0) +THCTensor *THCTensor_(newWithSize1d)(THCState *state, long size0) { - return THCudaTensor_newWithSize4d(state, size0, -1, -1, -1); + return THCTensor_(newWithSize4d)(state, size0, -1, -1, -1); } -THCudaTensor *THCudaTensor_newWithSize2d(THCState *state, long size0, long size1) +THCTensor *THCTensor_(newWithSize2d)(THCState *state, long size0, long size1) { - return THCudaTensor_newWithSize4d(state, size0, size1, -1, -1); + return THCTensor_(newWithSize4d)(state, size0, size1, -1, -1); } -THCudaTensor *THCudaTensor_newWithSize3d(THCState *state, long size0, long size1, long size2) +THCTensor *THCTensor_(newWithSize3d)(THCState *state, long size0, long size1, long size2) { - return THCudaTensor_newWithSize4d(state, size0, size1, size2, -1); + return THCTensor_(newWithSize4d)(state, size0, size1, size2, -1); } -THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0, long size1, long size2, long size3) +THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; - THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); - THCudaTensor_rawInit(state, self); - THCudaTensor_rawResize(state, self, 4, size, NULL); + THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); + THCTensor_(rawInit)(state, self); + THCTensor_(rawResize)(state, self, 4, size, NULL); return self; } -THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self) +THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self) { - THCudaTensor *tensor = THCudaTensor_new(state); - THCudaTensor_resizeAs(state, tensor, self); - THCudaTensor_copy(state, tensor, self); + THCTensor *tensor = THCTensor_(new)(state); + THCTensor_(resizeAs)(state, tensor, self); + THCTensor_(copy)(state, tensor, self); return tensor; } -THCudaTensor *THCudaTensor_newContiguous(THCState *state, THCudaTensor *self) +THCTensor *THCTensor_(newContiguous)(THCState *state, THCTensor *self) { - if(!THCudaTensor_isContiguous(state, self)) - return THCudaTensor_newClone(state, self); + if(!THCTensor_(isContiguous)(state, self)) + return THCTensor_(newClone)(state, self); else { - THCudaTensor_retain(state, self); + THCTensor_(retain)(state, self); return self; } } -THCudaTensor *THCudaTensor_newSelect(THCState *state, THCudaTensor *tensor, int dimension_, long sliceIndex_) +THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int dimension_, long sliceIndex_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_select(state, self, NULL, dimension_, sliceIndex_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(select)(state, self, NULL, dimension_, sliceIndex_); return self; } -THCudaTensor *THCudaTensor_newNarrow(THCState *state, THCudaTensor *tensor, int dimension_, long firstIndex_, long size_) +THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_narrow(state, self, NULL, dimension_, firstIndex_, size_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, self, NULL, dimension_, firstIndex_, size_); return self; } -THCudaTensor *THCudaTensor_newTranspose(THCState *state, THCudaTensor *tensor, int dimension1_, int dimension2_) +THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_transpose(state, self, NULL, dimension1_, dimension2_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(transpose)(state, self, NULL, dimension1_, dimension2_); return self; } -THCudaTensor *THCudaTensor_newUnfold(THCState *state, THCudaTensor *tensor, int dimension_, long size_, long step_) +THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_) { - THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor); - THCudaTensor_unfold(state, self, NULL, dimension_, size_, step_); + THCTensor *self = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(unfold)(state, self, NULL, dimension_, size_, step_); return self; } /* Resize */ -void THCudaTensor_resize(THCState *state, THCudaTensor *self, THLongStorage *size, THLongStorage *stride) +void THCTensor_(resize)(THCState *state, THCTensor *self, THLongStorage *size, THLongStorage *stride) { THArgCheck(size != NULL, 2, "invalid size"); if(stride) THArgCheck(stride->size == size->size, 3, "invalid stride"); - THCudaTensor_rawResize(state, self, size->size, size->data, (stride ? stride->data : NULL)); + THCTensor_(rawResize)(state, self, size->size, size->data, (stride ? stride->data : NULL)); } -void THCudaTensor_resizeAs(THCState *state, THCudaTensor *self, THCudaTensor *src) +void THCTensor_(resizeAs)(THCState *state, THCTensor *self, THCTensor *src) { int isSame = 0; int d; @@ -253,42 +252,42 @@ void THCudaTensor_resizeAs(THCState *state, THCudaTensor *self, THCudaTensor *sr } if(!isSame) - THCudaTensor_rawResize(state, self, src->nDimension, src->size, NULL); + THCTensor_(rawResize)(state, self, src->nDimension, src->size, NULL); } -void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0) +void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0) { - THCudaTensor_resize4d(state, tensor, size0, -1, -1, -1); + THCTensor_(resize4d)(state, tensor, size0, -1, -1, -1); } -void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long size0, long size1) +void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0, long size1) { - THCudaTensor_resize4d(state, tensor, size0, size1, -1, -1); + THCTensor_(resize4d)(state, tensor, size0, size1, -1, -1); } -void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0, long size1, long size2) +void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0, long size1, long size2) { - THCudaTensor_resize4d(state, tensor, size0, size1, size2, -1); + THCTensor_(resize4d)(state, tensor, size0, size1, size2, -1); } -void THCudaTensor_resize4d(THCState *state, THCudaTensor *self, long size0, long size1, long size2, long size3) +void THCTensor_(resize4d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; - THCudaTensor_rawResize(state, self, 4, size, NULL); + THCTensor_(rawResize)(state, self, 4, size, NULL); } -void THCudaTensor_resize5d(THCState *state, THCudaTensor *self, long size0, long size1, long size2, long size3, long size4) +void THCTensor_(resize5d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3, long size4) { long size[5] = {size0, size1, size2, size3, size4}; - THCudaTensor_rawResize(state, self, 5, size, NULL); + THCTensor_(rawResize)(state, self, 5, size, NULL); } -void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src) +void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src) { if(self != src) - THCudaTensor_rawSet(state, + THCTensor_(rawSet)(state, self, src->storage, src->storageOffset, @@ -297,12 +296,12 @@ void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src) src->stride); } -void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_) +void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_) { if(size_ && stride_) THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes"); - THCudaTensor_rawSet(state, + THCTensor_(rawSet)(state, self, storage_, storageOffset_, @@ -311,40 +310,40 @@ void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage (stride_ ? stride_->data : NULL)); } -void THCudaTensor_setStorage1d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_) { - THCudaTensor_setStorage4d(state, self, storage_, storageOffset_, + THCTensor_(setStorage4d)(state, self, storage_, storageOffset_, size0_, stride0_, -1, -1, -1, -1, -1, -1); } -void THCudaTensor_setStorage2d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_) { - THCudaTensor_setStorage4d(state, self, storage_, storageOffset_, + THCTensor_(setStorage4d)(state, self, storage_, storageOffset_, size0_, stride0_, size1_, stride1_, -1, -1, -1, -1); } -void THCudaTensor_setStorage3d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_) { - THCudaTensor_setStorage4d(state, self, storage_, storageOffset_, + THCTensor_(setStorage4d)(state, self, storage_, storageOffset_, size0_, stride0_, size1_, stride1_, size2_, stride2_, -1, -1); } -void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, @@ -354,11 +353,11 @@ void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorag long size[4] = {size0_, size1_, size2_, size3_}; long stride[4] = {stride0_, stride1_, stride2_, stride3_}; - THCudaTensor_rawSet(state, self, storage_, storageOffset_, 4, size, stride); + THCTensor_(rawSet)(state, self, storage_, storageOffset_, 4, size, stride); } -void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long firstIndex, long size) +void THCTensor_(narrow)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long firstIndex, long size) { if(!src) src = self; @@ -367,7 +366,7 @@ void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 4, "out of range"); THArgCheck( (size > 0) && (firstIndex+size <= src->size[dimension]), 5, "out of range"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); if(firstIndex > 0) self->storageOffset += firstIndex*self->stride[dimension]; @@ -375,7 +374,7 @@ void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, self->size[dimension] = size; } -void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long sliceIndex) +void THCTensor_(select)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long sliceIndex) { int d; @@ -386,8 +385,8 @@ void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, THArgCheck((dimension >= 0) && (dimension < src->nDimension), 3, "out of range"); THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 4, "out of range"); - THCudaTensor_set(state, self, src); - THCudaTensor_narrow(state, self, NULL, dimension, sliceIndex, 1); + THCTensor_(set)(state, self, src); + THCTensor_(narrow)(state, self, NULL, dimension, sliceIndex, 1); for(d = dimension; d < self->nDimension-1; d++) { self->size[d] = self->size[d+1]; @@ -396,7 +395,7 @@ void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, self->nDimension--; } -void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1, int dimension2) +void THCTensor_(transpose)(THCState *state, THCTensor *self, THCTensor *src, int dimension1, int dimension2) { long z; @@ -406,7 +405,7 @@ void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *s THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range"); THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); if(dimension1 == dimension2) return; @@ -419,7 +418,7 @@ void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *s self->size[dimension2] = z; } -void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long size, long step) +void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long size, long step) { long *newSize; long *newStride; @@ -433,7 +432,7 @@ void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, THArgCheck(size <= src->size[dimension], 3, "out of range"); THArgCheck(step > 0, 4, "invalid step"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); newSize = (long*)THAlloc(sizeof(long)*(self->nDimension+1)); newStride = (long*)THAlloc(sizeof(long)*(self->nDimension+1)); @@ -463,7 +462,7 @@ void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, } /* we have to handle the case where the result is a number */ -void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src) +void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src) { int ndim = 0; int d; @@ -471,7 +470,7 @@ void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src if(!src) src = self; - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); for(d = 0; d < src->nDimension; d++) { @@ -496,7 +495,7 @@ void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src self->nDimension = ndim; } -void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension) +void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension) { int d; @@ -505,7 +504,7 @@ void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *s THArgCheck(dimension < src->nDimension, 3, "dimension out of range"); - THCudaTensor_set(state, self, src); + THCTensor_(set)(state, self, src); if(src->size[dimension] == 1 && src->nDimension > 1) { @@ -518,7 +517,7 @@ void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *s } } -int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self) +int THCTensor_(isContiguous)(THCState *state, const THCTensor *self) { long z = 1; int d; @@ -535,7 +534,7 @@ int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self) return 1; } -int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongStorage *dims) +int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims) { int d; if (self->nDimension != dims->size) @@ -549,7 +548,7 @@ int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongS return 1; } -int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCudaTensor *src) +int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src) { if (self->storage == src->storage && self->storageOffset == src->storageOffset && @@ -566,7 +565,7 @@ int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCuda return 0; } -int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const THCudaTensor* src) +int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor* src) { int d; if (self->nDimension != src->nDimension) @@ -579,7 +578,7 @@ int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const T return 1; } -long THCudaTensor_nElement(THCState *state, const THCudaTensor *self) +long THCTensor_(nElement)(THCState *state, const THCTensor *self) { if(self->nDimension == 0) return 0; @@ -593,13 +592,13 @@ long THCudaTensor_nElement(THCState *state, const THCudaTensor *self) } } -void THCudaTensor_retain(THCState *state, THCudaTensor *self) +void THCTensor_(retain)(THCState *state, THCTensor *self) { if(self->flag & TH_TENSOR_REFCOUNTED) THAtomicIncrementRef(&self->refcount); } -void THCudaTensor_free(THCState *state, THCudaTensor *self) +void THCTensor_(free)(THCState *state, THCTensor *self) { if(!self) return; @@ -611,23 +610,23 @@ void THCudaTensor_free(THCState *state, THCudaTensor *self) THFree(self->size); THFree(self->stride); if(self->storage) - THCudaStorage_free(state, self->storage); + THCStorage_(free)(state, self->storage); THFree(self); } } } -void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst) +void THCTensor_(freeCopyTo)(THCState *state, THCTensor *self, THCTensor *dst) { if(self != dst) - THCudaTensor_copy(state, dst, self); + THCTensor_(copy)(state, dst, self); - THCudaTensor_free(state, self); + THCTensor_(free)(state, self); } /*******************************************************************************/ -static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self) +static void THCTensor_(rawInit)(THCState *state, THCTensor *self) { self->refcount = 1; self->storage = NULL; @@ -638,18 +637,18 @@ static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self) self->flag = TH_TENSOR_REFCOUNTED; } -static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride) +static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride) { /* storage */ if(self->storage != storage) { if(self->storage) - THCudaStorage_free(state, self->storage); + THCStorage_(free)(state, self->storage); if(storage) { self->storage = storage; - THCudaStorage_retain(state, self->storage); + THCStorage_(retain)(state, self->storage); } else self->storage = NULL; @@ -661,10 +660,10 @@ static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStora self->storageOffset = storageOffset; /* size and stride */ - THCudaTensor_rawResize(state, self, nDimension, size, stride); + THCTensor_(rawResize)(state, self, nDimension, size, stride); } -void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride) +void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride) { int d; int nDimension_; @@ -722,72 +721,72 @@ void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, if(totalSize+self->storageOffset > 0) { if(!self->storage) - self->storage = THCudaStorage_new(state); + self->storage = THCStorage_(new)(state); if(totalSize+self->storageOffset > self->storage->size) - THCudaStorage_resize(state, self->storage, totalSize+self->storageOffset); + THCStorage_(resize)(state, self->storage, totalSize+self->storageOffset); } } else self->nDimension = 0; } -void THCudaTensor_set1d(THCState *state, THCudaTensor *tensor, long x0, float value) +void THCTensor_(set1d)(THCState *state, THCTensor *tensor, long x0, real value) { THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value); } -float THCudaTensor_get1d(THCState *state, const THCudaTensor *tensor, long x0) +real THCTensor_(get1d)(THCState *state, const THCTensor *tensor, long x0) { THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]); } -void THCudaTensor_set2d(THCState *state, THCudaTensor *tensor, long x0, long x1, float value) +void THCTensor_(set2d)(THCState *state, THCTensor *tensor, long x0, long x1, real value) { THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value); } -float THCudaTensor_get2d(THCState *state, const THCudaTensor *tensor, long x0, long x1) +real THCTensor_(get2d)(THCState *state, const THCTensor *tensor, long x0, long x1) { THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]); } -void THCudaTensor_set3d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, float value) +void THCTensor_(set3d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, real value) { THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value); } -float THCudaTensor_get3d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2) +real THCTensor_(get3d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2) { THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions"); THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]); } -void THCudaTensor_set4d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, long x3, float value) +void THCTensor_(set4d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, long x3, real value) { THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); - THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value); } -float THCudaTensor_get4d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2, long x3) +real THCTensor_(get4d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2, long x3) { THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions"); THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range"); - return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]); + return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]); } -int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...) +int THCTensor_(checkGPU)(THCState *state, unsigned int nTensors, ...) { #ifdef DISABLE_CHECK_GPU return 1; // Disable GPU checks. @@ -798,11 +797,11 @@ int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...) va_start(args, nTensors); int valid = 1; for (unsigned int i = 0; i < nTensors; i++) { - THCudaTensor* tensor = va_arg(args, THCudaTensor*); + THCTensor* tensor = va_arg(args, THCTensor*); if (tensor == NULL) { continue; } - int tensorDev = THCudaTensor_getDevice(state, tensor); + int tensorDev = THCTensor_(getDevice)(state, tensor); if (tensorDev != -1 && tensorDev != curDev) { valid = 0; break; @@ -812,3 +811,5 @@ int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...) return valid; #endif } + +#endif diff --git a/lib/THC/generic/THCTensor.cu b/lib/THC/generic/THCTensor.cu index 3972ee3..beb96ec 100644 --- a/lib/THC/generic/THCTensor.cu +++ b/lib/THC/generic/THCTensor.cu @@ -1,14 +1,16 @@ -#include "THCTensor.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensor.cu" +#else -cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor *self) +cudaTextureObject_t THCTensor_(getTextureObject)(THCState *state, THCTensor *self) { - THAssert(THCudaTensor_checkGPU(state, 1, self)); + THAssert(THCTensor_(checkGPU)(state, 1, self)); cudaTextureObject_t texObj; struct cudaResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); resDesc.resType = cudaResourceTypeLinear; - resDesc.res.linear.devPtr = THCudaTensor_data(state, self); - resDesc.res.linear.sizeInBytes = THCudaTensor_nElement(state, self) * 4; + resDesc.res.linear.devPtr = THCTensor_(data)(state, self); + resDesc.res.linear.sizeInBytes = THCTensor_(nElement)(state, self) * 4; resDesc.res.linear.desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat); struct cudaTextureDesc texDesc; @@ -16,19 +18,21 @@ cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL); cudaError errcode = cudaGetLastError(); if(errcode != cudaSuccess) { - if (THCudaTensor_nElement(state, self) > 2>>27) + if (THCTensor_(nElement)(state, self) > 2>>27) THError("Failed to create texture object, " "nElement:%ld exceeds 27-bit addressing required for tex1Dfetch. Cuda Error: %s", - THCudaTensor_nElement(state, self), cudaGetErrorString(errcode)); + THCTensor_(nElement)(state, self), cudaGetErrorString(errcode)); else THError("Failed to create texture object: %s", cudaGetErrorString(errcode)); } return texObj; } -THC_API int THCudaTensor_getDevice(THCState* state, const THCudaTensor* thc) { +THC_API int THCTensor_(getDevice)(THCState* state, const THCTensor* thc) { if (!thc->storage) return -1; cudaPointerAttributes attr; THCudaCheck(cudaPointerGetAttributes(&attr, thc->storage->data)); return attr.device; } + +#endif diff --git a/lib/THC/generic/THCTensor.h b/lib/THC/generic/THCTensor.h index 27689e8..175eaee 100644 --- a/lib/THC/generic/THCTensor.h +++ b/lib/THC/generic/THCTensor.h @@ -1,133 +1,130 @@ -#ifndef THC_TENSOR_INC -#define THC_TENSOR_INC - -#include "THTensor.h" -#include "THCStorage.h" -#include "THCGeneral.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensor.h" +#else #define TH_TENSOR_REFCOUNTED 1 -typedef struct THCudaTensor +typedef struct THCTensor { long *size; long *stride; int nDimension; - THCudaStorage *storage; + THCStorage *storage; long storageOffset; int refcount; char flag; -} THCudaTensor; +} THCTensor; /**** access methods ****/ -THC_API THCudaStorage* THCudaTensor_storage(THCState *state, const THCudaTensor *self); -THC_API long THCudaTensor_storageOffset(THCState *state, const THCudaTensor *self); -THC_API int THCudaTensor_nDimension(THCState *state, const THCudaTensor *self); -THC_API long THCudaTensor_size(THCState *state, const THCudaTensor *self, int dim); -THC_API long THCudaTensor_stride(THCState *state, const THCudaTensor *self, int dim); -THC_API THLongStorage *THCudaTensor_newSizeOf(THCState *state, THCudaTensor *self); -THC_API THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self); -THC_API float *THCudaTensor_data(THCState *state, const THCudaTensor *self); +THC_API THCStorage* THCTensor_(storage)(THCState *state, const THCTensor *self); +THC_API long THCTensor_(storageOffset)(THCState *state, const THCTensor *self); +THC_API int THCTensor_(nDimension)(THCState *state, const THCTensor *self); +THC_API long THCTensor_(size)(THCState *state, const THCTensor *self, int dim); +THC_API long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim); +THC_API THLongStorage *THCTensor_(newSizeOf)(THCState *state, THCTensor *self); +THC_API THLongStorage *THCTensor_(newStrideOf)(THCState *state, THCTensor *self); +THC_API real *THCTensor_(data)(THCState *state, const THCTensor *self); -THC_API void THCudaTensor_setFlag(THCState *state, THCudaTensor *self, const char flag); -THC_API void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag); +THC_API void THCTensor_(setFlag)(THCState *state, THCTensor *self, const char flag); +THC_API void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag); /**** creation methods ****/ -THC_API THCudaTensor *THCudaTensor_new(THCState *state); -THC_API THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor); +THC_API THCTensor *THCTensor_(new)(THCState *state); +THC_API THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor); /* stride might be NULL */ -THC_API THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); -THC_API THCudaTensor *THCudaTensor_newWithStorage1d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); +THC_API THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_); -THC_API THCudaTensor *THCudaTensor_newWithStorage2d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_); -THC_API THCudaTensor *THCudaTensor_newWithStorage3d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_); -THC_API THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_); /* stride might be NULL */ -THC_API THCudaTensor *THCudaTensor_newWithSize(THCState *state, THLongStorage *size_, THLongStorage *stride_); -THC_API THCudaTensor *THCudaTensor_newWithSize1d(THCState *state, long size0_); -THC_API THCudaTensor *THCudaTensor_newWithSize2d(THCState *state, long size0_, long size1_); -THC_API THCudaTensor *THCudaTensor_newWithSize3d(THCState *state, long size0_, long size1_, long size2_); -THC_API THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0_, long size1_, long size2_, long size3_); - -THC_API THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self); -THC_API THCudaTensor *THCudaTensor_newContiguous(THCState *state, THCudaTensor *tensor); -THC_API THCudaTensor *THCudaTensor_newSelect(THCState *state, THCudaTensor *tensor, int dimension_, long sliceIndex_); -THC_API THCudaTensor *THCudaTensor_newNarrow(THCState *state, THCudaTensor *tensor, int dimension_, long firstIndex_, long size_); -THC_API THCudaTensor *THCudaTensor_newTranspose(THCState *state, THCudaTensor *tensor, int dimension1_, int dimension2_); -THC_API THCudaTensor *THCudaTensor_newUnfold(THCState *state, THCudaTensor *tensor, int dimension_, long size_, long step_); - -THC_API void THCudaTensor_resize(THCState *state, THCudaTensor *tensor, THLongStorage *size, THLongStorage *stride); -THC_API void THCudaTensor_resizeAs(THCState *state, THCudaTensor *tensor, THCudaTensor *src); -THC_API void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0_); -THC_API void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long size0_, long size1_); -THC_API void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_); -THC_API void THCudaTensor_resize4d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_); -THC_API void THCudaTensor_resize5d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_); -THC_API void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride); - -THC_API void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); -THC_API void THCudaTensor_setStorage1d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API THCTensor *THCTensor_(newWithSize)(THCState *state, THLongStorage *size_, THLongStorage *stride_); +THC_API THCTensor *THCTensor_(newWithSize1d)(THCState *state, long size0_); +THC_API THCTensor *THCTensor_(newWithSize2d)(THCState *state, long size0_, long size1_); +THC_API THCTensor *THCTensor_(newWithSize3d)(THCState *state, long size0_, long size1_, long size2_); +THC_API THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0_, long size1_, long size2_, long size3_); + +THC_API THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self); +THC_API THCTensor *THCTensor_(newContiguous)(THCState *state, THCTensor *tensor); +THC_API THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int dimension_, long sliceIndex_); +THC_API THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_); +THC_API THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_); +THC_API THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_); + +THC_API void THCTensor_(resize)(THCState *state, THCTensor *tensor, THLongStorage *size, THLongStorage *stride); +THC_API void THCTensor_(resizeAs)(THCState *state, THCTensor *tensor, THCTensor *src); +THC_API void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0_); +THC_API void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0_, long size1_); +THC_API void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_); +THC_API void THCTensor_(resize4d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_); +THC_API void THCTensor_(resize5d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_); +THC_API void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride); + +THC_API void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src); +THC_API void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_); +THC_API void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_); -THC_API void THCudaTensor_setStorage2d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_); -THC_API void THCudaTensor_setStorage3d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_); -THC_API void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, +THC_API void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, long size0_, long stride0_, long size1_, long stride1_, long size2_, long stride2_, long size3_, long stride3_); -THC_API void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long firstIndex_, long size_); -THC_API void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long sliceIndex_); -THC_API void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1_, int dimension2_); -THC_API void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long size_, long step_); +THC_API void THCTensor_(narrow)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long firstIndex_, long size_); +THC_API void THCTensor_(select)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long sliceIndex_); +THC_API void THCTensor_(transpose)(THCState *state, THCTensor *self, THCTensor *src, int dimension1_, int dimension2_); +THC_API void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long size_, long step_); -THC_API void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_); +THC_API void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src); +THC_API void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension_); -THC_API int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self); -THC_API int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const THCudaTensor *src); -THC_API int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCudaTensor *src); -THC_API int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongStorage *dims); -THC_API long THCudaTensor_nElement(THCState *state, const THCudaTensor *self); +THC_API int THCTensor_(isContiguous)(THCState *state, const THCTensor *self); +THC_API int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor *src); +THC_API int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src); +THC_API int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims); +THC_API long THCTensor_(nElement)(THCState *state, const THCTensor *self); -THC_API void THCudaTensor_retain(THCState *state, THCudaTensor *self); -THC_API void THCudaTensor_free(THCState *state, THCudaTensor *self); -THC_API void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst); +THC_API void THCTensor_(retain)(THCState *state, THCTensor *self); +THC_API void THCTensor_(free)(THCState *state, THCTensor *self); +THC_API void THCTensor_(freeCopyTo)(THCState *state, THCTensor *self, THCTensor *dst); /* Slow access methods [check everything] */ -THC_API void THCudaTensor_set1d(THCState *state, THCudaTensor *tensor, long x0, float value); -THC_API void THCudaTensor_set2d(THCState *state, THCudaTensor *tensor, long x0, long x1, float value); -THC_API void THCudaTensor_set3d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, float value); -THC_API void THCudaTensor_set4d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, long x3, float value); +THC_API void THCTensor_(set1d)(THCState *state, THCTensor *tensor, long x0, real value); +THC_API void THCTensor_(set2d)(THCState *state, THCTensor *tensor, long x0, long x1, real value); +THC_API void THCTensor_(set3d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, real value); +THC_API void THCTensor_(set4d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, long x3, real value); -THC_API float THCudaTensor_get1d(THCState *state, const THCudaTensor *tensor, long x0); -THC_API float THCudaTensor_get2d(THCState *state, const THCudaTensor *tensor, long x0, long x1); -THC_API float THCudaTensor_get3d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2); -THC_API float THCudaTensor_get4d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2, long x3); +THC_API real THCTensor_(get1d)(THCState *state, const THCTensor *tensor, long x0); +THC_API real THCTensor_(get2d)(THCState *state, const THCTensor *tensor, long x0, long x1); +THC_API real THCTensor_(get3d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2); +THC_API real THCTensor_(get4d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2, long x3); /* CUDA-specific functions */ -THC_API cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor *self); -THC_API int THCudaTensor_getDevice(THCState *state, const THCudaTensor *self); -THC_API int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...); +THC_API cudaTextureObject_t THCTensor_(getTextureObject)(THCState *state, THCTensor *self); +THC_API int THCTensor_(getDevice)(THCState *state, const THCTensor *self); +THC_API int THCTensor_(checkGPU)(THCState *state, unsigned int nTensors, ...); #endif diff --git a/lib/THC/generic/THCTensorCopy.c b/lib/THC/generic/THCTensorCopy.c index 5fbacca..c1813f9 100644 --- a/lib/THC/generic/THCTensorCopy.c +++ b/lib/THC/generic/THCTensorCopy.c @@ -1,42 +1,42 @@ -#include "THCTensorCopy.h" -#include "THCGeneral.h" -#include "THCTensor.h" +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensorCopy.c" +#else /* specific methods */ -void THCudaTensor_copyFloat(THCState *state, THCudaTensor *self, struct THFloatTensor *src) +void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src) { - THArgCheck(THCudaTensor_nElement(state, self) == THFloatTensor_nElement(src), 2, "sizes do not match"); + THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); { - THCudaTensor *selfc = THCudaTensor_newContiguous(state, self); - src = THFloatTensor_newContiguous(src); - - THCudaCheck(cudaMemcpy(THCudaTensor_data(state, selfc), - THFloatTensor_data(src), - THFloatTensor_nElement(src) * sizeof(float), + THCTensor *selfc = THCTensor_(newContiguous)(state, self); + src = THTensor_(newContiguous)(src); + + THCudaCheck(cudaMemcpy(THCTensor_(data)(state,selfc), + THTensor_(data)(src), + THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice)); - THFloatTensor_free(src); - THCudaTensor_freeCopyTo(state, selfc, self); + THTensor_(free)(src); + THCTensor_(freeCopyTo)(state, selfc, self); } } -/* everything comes down to copy to a tensor of floats */ #define IMPLEMENT_TH_CUDA_TENSOR_COPY(TYPEC) \ -void THCudaTensor_copy##TYPEC(THCState *state, THCudaTensor *self, struct TH##TYPEC##Tensor *src) \ +void THCTensor_(copy##TYPEC)(THCState *state, THCTensor *self, struct TH##TYPEC##Tensor *src) \ { \ - THArgCheck(THCudaTensor_nElement(state, self) == TH##TYPEC##Tensor_nElement(src), 2, "sizes do not match"); \ - \ - { \ + THArgCheck(THCTensor_(nElement)(state, self) == TH##TYPEC##Tensor_nElement(src), 2, "sizes do not match"); \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCTensor_(copyCPU)(state, self, (THTensor*) src); /* cast just removes warnings */ \ + } else { \ THLongStorage *size = TH##TYPEC##Tensor_newSizeOf(src); \ - THFloatTensor *srcf = THFloatTensor_newWithSize(size, NULL); \ + THTensor *srcf = THTensor_(newWithSize)(size, NULL); \ \ - THFloatTensor_copy##TYPEC(srcf, src); \ - THCudaTensor_copyFloat(state, self, srcf); \ + THTensor_(copy##TYPEC)(srcf, src); \ + THCTensor_(copyCPU)(state, self, srcf); \ \ THLongStorage_free(size); \ - THFloatTensor_free(srcf); \ + THTensor_(free)(srcf); \ } \ } @@ -45,43 +45,45 @@ IMPLEMENT_TH_CUDA_TENSOR_COPY(Char) IMPLEMENT_TH_CUDA_TENSOR_COPY(Short) IMPLEMENT_TH_CUDA_TENSOR_COPY(Int) IMPLEMENT_TH_CUDA_TENSOR_COPY(Long) +IMPLEMENT_TH_CUDA_TENSOR_COPY(Float) IMPLEMENT_TH_CUDA_TENSOR_COPY(Double) /* copyCuda */ -void THFloatTensor_copyCuda(THCState *state, THFloatTensor *self, struct THCudaTensor *src) +void THTensor_(copyCuda)(THCState *state, THTensor *self, struct THCTensor *src) { - THArgCheck(THFloatTensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); + THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); { - THFloatTensor *selfc = THFloatTensor_newContiguous(self); - src = THCudaTensor_newContiguous(state, src); + THTensor *selfc = THTensor_(newContiguous)(self); + src = THCTensor_(newContiguous)(state, src); - THCudaCheck(cudaMemcpy(THFloatTensor_data(selfc), - THCudaTensor_data(state, src), - THCudaTensor_nElement(state, src) * sizeof(float), + THCudaCheck(cudaMemcpy(THTensor_(data)(selfc), + THCTensor_(data)(state, src), + THCTensor_(nElement)(state, src) * sizeof(real), cudaMemcpyDeviceToHost)); - THCudaTensor_free(state, src); - THFloatTensor_freeCopyTo(selfc, self); + THCTensor_(free)(state, src); + THTensor_(freeCopyTo)(selfc, self); } } -#define IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(TYPEC) \ - void TH##TYPEC##Tensor_copyCuda(THCState *state, TH##TYPEC##Tensor *self, struct THCudaTensor *src) \ - { \ - THArgCheck(TH##TYPEC##Tensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); \ - \ - { \ - THLongStorage *size = THCudaTensor_newSizeOf(state, src); \ - THFloatTensor *srcf = THFloatTensor_newWithSize(size, NULL); \ - \ - THFloatTensor_copyCuda(state, srcf, src); \ - TH##TYPEC##Tensor_copyFloat(self, srcf); \ - \ - THLongStorage_free(size); \ - THFloatTensor_free(srcf); \ - } \ +#define IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(TYPEC) \ + void TH_CONCAT_4(TH,TYPEC,Tensor_copyCuda,Real)(THCState *state, TH##TYPEC##Tensor *self, struct THCTensor *src) \ + { \ + THArgCheck(TH##TYPEC##Tensor_nElement(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THTensor_(copyCuda)(state, (THTensor*) self, src); /* cast just removes compiler warning */ \ + } else { \ + THLongStorage *size = THCTensor_(newSizeOf)(state, src); \ + THTensor *srcf = THTensor_(newWithSize)(size, NULL); \ + \ + THTensor_(copyCuda)(state, srcf, src); \ + TH_CONCAT_4(TH,TYPEC,Tensor_copy,Real)(self, srcf); \ + \ + THLongStorage_free(size); \ + THTensor_(free)(srcf); \ + } \ } IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Byte) @@ -89,23 +91,25 @@ IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Char) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Short) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Int) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Long) +IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Float) IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Double) -void THCudaTensor_copyCuda(THCState *state, THCudaTensor *self, THCudaTensor *src) +// FIXME: add within-CUDA conversions +void THCTensor_(copyCuda)(THCState *state, THCTensor *self, THCTensor *src) { - THCudaTensor_copy(state, self, src); + THCTensor_(copy)(state, self, src); } -void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THFloatTensor *src) +void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, struct THTensor *src) { - THArgCheck(THCudaTensor_nElement(state, self) == THFloatTensor_nElement(src), 2, "sizes do not match"); - THArgCheck(THCudaTensor_isContiguous(state, self), 2, "Target tensor must be contiguous"); - THArgCheck(THFloatTensor_isContiguous(src), 3, "Source tensor must be contiguous"); + THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); + THArgCheck(THCTensor_(isContiguous)(state, self), 2, "Target tensor must be contiguous"); + THArgCheck(THTensor_(isContiguous)(src), 3, "Source tensor must be contiguous"); - if (THCudaTensor_nElement(state, self) == 0) return; + if (THCTensor_(nElement)(state, self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. - int tensorDevice = THCudaTensor_getDevice(state, self); + int tensorDevice = THCTensor_(getDevice)(state, self); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); @@ -113,9 +117,9 @@ void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THF THCudaCheck(cudaSetDevice(tensorDevice)); } - THCudaCheck(cudaMemcpyAsync(THCudaTensor_data(state, self), - THFloatTensor_data(src), - THFloatTensor_nElement(src) * sizeof(float), + THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, self), + THTensor_(data)(src), + THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice, THCState_getDeviceStream(state, tensorDevice, THCState_getCurrentStreamIndex(state)))); @@ -125,16 +129,16 @@ void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THF } } -void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct THCudaTensor *src) +void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src) { - THArgCheck(THFloatTensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); - THArgCheck(THFloatTensor_isContiguous(self), 2, "Target tensor must be contiguous"); - THArgCheck(THCudaTensor_isContiguous(state, src), 3, "Source tensor must be contiguous"); + THArgCheck(THTensor_(nElement)(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); + THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous"); + THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous"); - if (THFloatTensor_nElement(self) == 0) return; + if (THTensor_(nElement)(self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. - int tensorDevice = THCudaTensor_getDevice(state, src); + int tensorDevice = THCTensor_(getDevice)(state, src); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); @@ -142,9 +146,9 @@ void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct TH THCudaCheck(cudaSetDevice(tensorDevice)); } - THCudaCheck(cudaMemcpyAsync(THFloatTensor_data(self), - THCudaTensor_data(state, src), - THCudaTensor_nElement(state, src) * sizeof(float), + THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self), + THCTensor_(data)(state, src), + THCTensor_(nElement)(state, src) * sizeof(real), cudaMemcpyDeviceToHost, THCState_getDeviceStream(state, tensorDevice, THCState_getCurrentStreamIndex(state)))); @@ -153,3 +157,5 @@ void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct TH THCudaCheck(cudaSetDevice(currentDevice)); } } + +#endif diff --git a/lib/THC/generic/THCTensorCopy.cu b/lib/THC/generic/THCTensorCopy.cu index 5aa7ee5..304e52c 100644 --- a/lib/THC/generic/THCTensorCopy.cu +++ b/lib/THC/generic/THCTensorCopy.cu @@ -1,19 +1,15 @@ -#include "THCApply.cuh" - -static inline int curGPU() { - int curDev; - THCudaCheck(cudaGetDevice(&curDev)); - return curDev; -} +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensorCopy.cu" +#else THC_API void -THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { - long totalElements = THCudaTensor_nElement(state, dst); +THCTensor_(copy)(THCState* state, THCTensor* dst, THCTensor* src) { + long totalElements = THCTensor_(nElement)(state, dst); - THArgCheck(totalElements == THCudaTensor_nElement(state, src), 2, + THArgCheck(totalElements == THCTensor_(nElement)(state, src), 2, "sizes do not match"); - if (THCudaTensor_nDimension(state, dst) == 0) { + if (THCTensor_(nDimension)(state, dst) == 0) { // Zero-dim tensor; copy nothing return; } @@ -24,12 +20,12 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { // -FIXME: if both tensors have matching size and stride arrays, and no // holes within (in other words, there is some permutation that can be applied // to the size/strides such that the resulting tensor is contiguous). - bool srcContig = THCudaTensor_isContiguous(state, src); - bool dstContig = THCudaTensor_isContiguous(state, dst); + bool srcContig = THCTensor_(isContiguous)(state, src); + bool dstContig = THCTensor_(isContiguous)(state, dst); bool memcpyEligible = (srcContig && dstContig) || (totalElements == 1); - int srcDev = THCudaTensor_getDevice(state, src); - int dstDev = THCudaTensor_getDevice(state, dst); + int srcDev = THCTensor_(getDevice)(state, src); + int dstDev = THCTensor_(getDevice)(state, dst); int oldDev = curGPU(); // We always perform the copy on the source device, using the @@ -71,12 +67,13 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { // We are now on srcDev if (memcpyEligible) { // Perform the copy - THCudaCheck(cudaMemcpyAsync(THCudaTensor_data(state, dst), - THCudaTensor_data(state, src), - totalElements * sizeof(float), + THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, dst), + THCTensor_(data)(state, src), + totalElements * sizeof(real), cudaMemcpyDeviceToDevice, copyStream)); } else { +#ifdef THC_REAL_IS_FLOAT // Non-contiguous copy // We avoid creating temporary memory copies if possible. @@ -139,6 +136,11 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { THCudaTensor_freeCopyTo(state, dstContig, dst); } } +#else +#define STRINGIFY(x) #x + THError("Non-contiguous copy not implemented for Cuda%sTensor", STRINGIFY(Real)); +#undef STRINGIFY +#endif } if (srcDev != dstDev && copyStreamIndex == 0) { @@ -170,3 +172,34 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) { THError(cudaGetErrorString(errcode)); } } + +// conversions are mediated by the CPU +// yes, this is slow; feel free to write CUDA kernels for this +#define THC_CUDA_TENSOR_IMPLEMENT_COPY(TYPEC,TYPECUDA) \ + void THCTensor_(copyCuda##TYPEC)(THCState *state, THCTensor *self, struct THCuda##TYPECUDA##Tensor *src) \ + { \ + if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \ + THCTensor_(copy)(state, self, (THCTensor*) src); /* cast just removes compiler warning */ \ + } else { \ + THArgCheck(THCTensor_(nElement)(state, self) == THCuda##TYPECUDA##Tensor_nElement(state, src), 2, "size does not match"); \ + THLongStorage *size = THCuda##TYPECUDA##Tensor_newSizeOf(state, src); \ + TH##TYPEC##Tensor *buffer1 = TH##TYPEC##Tensor_newWithSize(size, NULL); \ + THTensor *buffer2 = THTensor_(newWithSize)(size, NULL); \ + TH##TYPEC##Tensor_copyCuda(state, buffer1, src); \ + THTensor_(copy##TYPEC)(buffer2, buffer1); \ + THCTensor_(copyCPU)(state, self, buffer2); \ + THLongStorage_free(size); \ + TH##TYPEC##Tensor_free(buffer1); \ + THTensor_(free)(buffer2); \ + } \ + } + +THC_CUDA_TENSOR_IMPLEMENT_COPY(Byte,Byte) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Char,Char) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Short,Short) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Int,Int) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Long,Long) +THC_CUDA_TENSOR_IMPLEMENT_COPY(Float,) // i.e. float +THC_CUDA_TENSOR_IMPLEMENT_COPY(Double,Double) + +#endif diff --git a/lib/THC/generic/THCTensorCopy.h b/lib/THC/generic/THCTensorCopy.h index d843213..b71fe0f 100644 --- a/lib/THC/generic/THCTensorCopy.h +++ b/lib/THC/generic/THCTensorCopy.h @@ -1,28 +1,36 @@ -#ifndef TH_CUDA_TENSOR_COPY_INC -#define TH_CUDA_TENSOR_COPY_INC +#ifndef THC_GENERIC_FILE +#define THC_GENERIC_FILE "generic/THCTensorCopy.h" +#else -#include "THCTensor.h" -#include "THCGeneral.h" +THC_API void THCTensor_(copy)(THCState *state, THCTensor *self, THCTensor *src); +THC_API void THCTensor_(copyByte)(THCState *state, THCTensor *self, THByteTensor *src); +THC_API void THCTensor_(copyChar)(THCState *state, THCTensor *self, THCharTensor *src); +THC_API void THCTensor_(copyShort)(THCState *state, THCTensor *self, THShortTensor *src); +THC_API void THCTensor_(copyInt)(THCState *state, THCTensor *self, THIntTensor *src); +THC_API void THCTensor_(copyLong)(THCState *state, THCTensor *self, THLongTensor *src); +THC_API void THCTensor_(copyFloat)(THCState *state, THCTensor *self, THFloatTensor *src); +THC_API void THCTensor_(copyDouble)(THCState *state, THCTensor *self, THDoubleTensor *src); -THC_API void THCudaTensor_copy(THCState *state, THCudaTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_copyByte(THCState *state, THCudaTensor *self, THByteTensor *src); -THC_API void THCudaTensor_copyChar(THCState *state, THCudaTensor *self, THCharTensor *src); -THC_API void THCudaTensor_copyShort(THCState *state, THCudaTensor *self, THShortTensor *src); -THC_API void THCudaTensor_copyInt(THCState *state, THCudaTensor *self, THIntTensor *src); -THC_API void THCudaTensor_copyLong(THCState *state, THCudaTensor *self, THLongTensor *src); -THC_API void THCudaTensor_copyFloat(THCState *state, THCudaTensor *self, THFloatTensor *src); -THC_API void THCudaTensor_copyDouble(THCState *state, THCudaTensor *self, THDoubleTensor *src); +THC_API void THCTensor_(copyCudaByte)(THCState *state, THCTensor *storage, struct THCudaByteTensor *src); +THC_API void THCTensor_(copyCudaChar)(THCState *state, THCTensor *storage, struct THCudaCharTensor *src); +THC_API void THCTensor_(copyCudaShort)(THCState *state, THCTensor *storage, struct THCudaShortTensor *src); +THC_API void THCTensor_(copyCudaInt)(THCState *state, THCTensor *storage, struct THCudaIntTensor *src); +THC_API void THCTensor_(copyCudaLong)(THCState *state, THCTensor *storage, struct THCudaLongTensor *src); +THC_API void THCTensor_(copyCudaFloat)(THCState *state, THCTensor *storage, struct THCudaTensor *src); +THC_API void THCTensor_(copyCudaDouble)(THCState *state, THCTensor *storage, struct THCudaDoubleTensor *src); -THC_API void THByteTensor_copyCuda(THCState *state, THByteTensor *self, THCudaTensor *src); -THC_API void THCharTensor_copyCuda(THCState *state, THCharTensor *self, THCudaTensor *src); -THC_API void THShortTensor_copyCuda(THCState *state, THShortTensor *self, THCudaTensor *src); -THC_API void THIntTensor_copyCuda(THCState *state, THIntTensor *self, THCudaTensor *src); -THC_API void THLongTensor_copyCuda(THCState *state, THLongTensor *self, THCudaTensor *src); -THC_API void THFloatTensor_copyCuda(THCState *state, THFloatTensor *self, THCudaTensor *src); -THC_API void THDoubleTensor_copyCuda(THCState *state, THDoubleTensor *self, THCudaTensor *src); -THC_API void THCudaTensor_copyCuda(THCState *state, THCudaTensor *self, THCudaTensor *src); +THC_API void TH_CONCAT_2(THByteTensor_copyCuda , Real) (THCState *state, THByteTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THCharTensor_copyCuda , Real) (THCState *state, THCharTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THShortTensor_copyCuda , Real) (THCState *state, THShortTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THIntTensor_copyCuda , Real) (THCState *state, THIntTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THLongTensor_copyCuda , Real) (THCState *state, THLongTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THFloatTensor_copyCuda , Real) (THCState *state, THFloatTensor *self, THCTensor *src); +THC_API void TH_CONCAT_2(THDoubleTensor_copyCuda, Real) (THCState *state, THDoubleTensor *self, THCTensor *src); +THC_API void THCTensor_(copyCuda) (THCState *state, THCTensor *self, THCTensor *src); +THC_API void THTensor_(copyCuda) (THCState *state, THTensor *self, THCTensor *src); +THC_API void THCTensor_(copyCPU) (THCState *state, THCTensor *self, THTensor *src); -THC_API void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, THFloatTensor *src); -THC_API void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, THCudaTensor *src); +THC_API void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, THTensor *src); +THC_API void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, THCTensor *src); #endif diff --git a/test/test.lua b/test/test.lua index 845ad95..27d1a15 100644 --- a/test/test.lua +++ b/test/test.lua @@ -2035,6 +2035,106 @@ function test.multi_gpu_copy_noncontig() end end +function test.cudaTypeCopy() + + local types = { + {'float', 'FloatTensor'}, + {'byte', 'ByteTensor'}, + {'char', 'CharTensor'}, + {'short', 'ShortTensor'}, + {'long', 'LongTensor'}, + {'double','DoubleTensor'}, + + {'cuda', 'CudaTensor'}, + {'cudaByte', 'CudaByteTensor'}, + {'cudaChar', 'CudaCharTensor'}, + {'cudaShort', 'CudaShortTensor'}, + {'cudaLong', 'CudaLongTensor'}, + {'cudaDouble','CudaDoubleTensor'}, + } + + local N = 100 + local t0 = torch.range(1,12):reshape(3,4) + + -- t carries over from one iteration to the next + local t = t0:clone() + for i = 1, N do + -- convert to a random (CPU or GPU) type) + local conversionFunc, tensorSubtype = unpack(types[torch.random(#types)]) + local tensorType = 'torch.' .. tensorSubtype + + if torch.random(0,1) ~= 0 then + -- this is equivalent to t = t:float() + t = t[conversionFunc](t) + else + -- this is equivalent to t = torch.XTensor():copy(t) + t = torch[tensorSubtype](3,4):copy(t) + end + + -- check the type + tester:assert(t:type() == tensorType, t:type() .. ' ~= ' .. tensorType) + + -- check metadata + tester:assert(t:isContiguous()) + tester:assert(t:size(1) == 3 and t:size(2) == 4) + tester:assert(t:nDimension() == 2) + + -- check data + tester:assertTensorEq(t:double(), t0, 0) + + + -- check indexing + -- FIXME: doesn't work yet + -- tester:assert(ct[{1,1}] == 1) + end + + -- check narrowing conversions + tester:assert(torch.Tensor(1):fill(500):cudaByte():float()[1] == 244) + tester:assert(torch.Tensor(1):fill(500):cudaChar():float()[1] == -12) +end + + +function test.cudaStorageTypeCopy() + + local types = { + {'float', 'FloatStorage'}, + {'byte', 'ByteStorage'}, + {'char', 'CharStorage'}, + {'short', 'ShortStorage'}, + {'long', 'LongStorage'}, + {'double','DoubleStorage'}, + + {'cuda', 'CudaStorage'}, + {'cudaByte', 'CudaByteStorage'}, + {'cudaChar', 'CudaCharStorage'}, + {'cudaShort', 'CudaShortStorage'}, + {'cudaLong', 'CudaLongStorage'}, + {'cudaDouble','CudaDoubleStorage'}, + } + + local N = 100 + local t0 = torch.range(1,12):reshape(3,4):storage() + + -- t carries over from one iteration to the next + local t = torch.DoubleStorage(t0:size()):copy(t0) + for i = 1, N do + -- convert to a random (CPU or GPU) type) + local conversionFunc, storageSubtype = unpack(types[torch.random(#types)]) + local storageType = 'torch.' .. storageSubtype + + -- this is equivalent to t = torch.XStorage():copy(t) + t = torch[storageSubtype](12):copy(t) + + -- check the type + tester:assert(torch.type(t) == storageType, torch.type(t) .. ' ~= ' .. storageType) + + local d = torch.DoubleStorage(12):copy(t) + for i = 1, t:size() do + tester:assert(d[i] == t0[i], storageSubtype .. ': ' .. i .. ': ' .. d[i] .. ' ~= ' .. t0[i]) + end + end +end + function test.maskedSelect() local n_row = math.random(minsize,maxsize) local n_col = math.random(minsize,maxsize) diff --git a/torch/generic/Storage.c b/torch/generic/Storage.c index 6f0188d..063704a 100644 --- a/torch/generic/Storage.c +++ b/torch/generic/Storage.c @@ -5,34 +5,34 @@ static int torch_Storage_(new)(lua_State *L) { THCState *state = cutorch_getstate(L); - THStorage *storage; + THCStorage *storage; if(lua_type(L, 1) == LUA_TSTRING) { const char *fileName = luaL_checkstring(L, 1); int isShared = luaT_optboolean(L, 2, 0); long size = luaL_optlong(L, 3, 0); - storage = THStorage_(newWithMapping)(state, fileName, size, isShared); + storage = THCStorage_(newWithMapping)(state, fileName, size, isShared); } else if(lua_type(L, 1) == LUA_TTABLE) { long size = lua_objlen(L, 1); long i; - storage = THStorage_(newWithSize)(state, size); + storage = THCStorage_(newWithSize)(state, size); for(i = 1; i <= size; i++) { lua_rawgeti(L, 1, i); if(!lua_isnumber(L, -1)) { - THStorage_(free)(state, storage); + THCStorage_(free)(state, storage); luaL_error(L, "element at index %d is not a number", i); } - THStorage_(set)(state, storage, i-1, (real)lua_tonumber(L, -1)); + THCStorage_(set)(state, storage, i-1, (real)lua_tonumber(L, -1)); lua_pop(L, 1); } } else if(lua_type(L, 1) == LUA_TUSERDATA) { - THStorage *src = luaT_checkudata(L, 1, torch_Storage); + THCStorage *src = luaT_checkudata(L, 1, torch_Storage); real *ptr = src->data; long offset = luaL_optlong(L, 2, 1) - 1; if (offset < 0 || offset >= src->size) { @@ -42,22 +42,22 @@ static int torch_Storage_(new)(lua_State *L) if (size < 1 || size > (src->size - offset)) { luaL_error(L, "size out of bounds"); } - storage = THStorage_(newWithData)(state, ptr + offset, size); + storage = THCStorage_(newWithData)(state, ptr + offset, size); storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_VIEW; storage->view = src; - THStorage_(retain)(state, storage->view); + THCStorage_(retain)(state, storage->view); } else if(lua_type(L, 2) == LUA_TNUMBER) { long size = luaL_optlong(L, 1, 0); real *ptr = (real *)luaL_optlong(L, 2, 0); - storage = THStorage_(newWithData)(state, ptr, size); + storage = THCStorage_(newWithData)(state, ptr, size); storage->flag = TH_STORAGE_REFCOUNTED; } else { long size = luaL_optlong(L, 1, 0); - storage = THStorage_(newWithSize)(state, size); + storage = THCStorage_(newWithSize)(state, size); } luaT_pushudata(L, storage, torch_Storage); return 1; @@ -65,24 +65,24 @@ static int torch_Storage_(new)(lua_State *L) static int torch_Storage_(retain)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); - THStorage_(retain)(cutorch_getstate(L), storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage_(retain)(cutorch_getstate(L), storage); return 0; } static int torch_Storage_(free)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); - THStorage_(free)(cutorch_getstate(L), storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage_(free)(cutorch_getstate(L), storage); return 0; } static int torch_Storage_(resize)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); long size = luaL_checklong(L, 2); /* int keepContent = luaT_optboolean(L, 3, 0); */ - THStorage_(resize)(cutorch_getstate(L), storage, size);/*, keepContent); */ + THCStorage_(resize)(cutorch_getstate(L), storage, size);/*, keepContent); */ lua_settop(L, 1); return 1; } @@ -90,24 +90,24 @@ static int torch_Storage_(resize)(lua_State *L) static int torch_Storage_(copy)(lua_State *L) { THCState *state = cutorch_getstate(L); - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); void *src; if( (src = luaT_toudata(L, 2, torch_Storage)) ) - THStorage_(copy)(state, storage, src); + THCStorage_(copy)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) - THStorage_(copyByte)(state, storage, src); + THCStorage_(copyByte)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) - THStorage_(copyChar)(state, storage, src); + THCStorage_(copyChar)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) - THStorage_(copyShort)(state, storage, src); + THCStorage_(copyShort)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) - THStorage_(copyInt)(state, storage, src); + THCStorage_(copyInt)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) - THStorage_(copyLong)(state, storage, src); + THCStorage_(copyLong)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) - THStorage_(copyFloat)(state, storage, src); + THCStorage_(copyFloat)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) - THStorage_(copyDouble)(state, storage, src); + THCStorage_(copyDouble)(state, storage, src); else luaL_typerror(L, 2, "torch.*Storage"); lua_settop(L, 1); @@ -116,9 +116,9 @@ static int torch_Storage_(copy)(lua_State *L) static int torch_Storage_(fill)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); double value = luaL_checknumber(L, 2); - THStorage_(fill)(cutorch_getstate(L), storage, (real)value); + THCStorage_(fill)(cutorch_getstate(L), storage, (real)value); lua_settop(L, 1); return 1; } @@ -131,7 +131,7 @@ static int torch_Storage_(elementSize)(lua_State *L) static int torch_Storage_(__len__)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); lua_pushnumber(L, storage->size); return 1; } @@ -140,10 +140,10 @@ static int torch_Storage_(__newindex__)(lua_State *L) { if(lua_isnumber(L, 2)) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); long index = luaL_checklong(L, 2) - 1; double number = luaL_checknumber(L, 3); - THStorage_(set)(cutorch_getstate(L), storage, index, (real)number); + THCStorage_(set)(cutorch_getstate(L), storage, index, (real)number); lua_pushboolean(L, 1); } else @@ -156,9 +156,9 @@ static int torch_Storage_(__index__)(lua_State *L) { if(lua_isnumber(L, 2)) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); long index = luaL_checklong(L, 2) - 1; - lua_pushnumber(L, THStorage_(get)(cutorch_getstate(L), storage, index)); + lua_pushnumber(L, THCStorage_(get)(cutorch_getstate(L), storage, index)); lua_pushboolean(L, 1); return 2; } @@ -172,12 +172,12 @@ static int torch_Storage_(__index__)(lua_State *L) #if defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_BYTE) static int torch_Storage_(string)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); if(lua_isstring(L, -1)) { size_t len = 0; const char *str = lua_tolstring(L, -1, &len); - THStorage_(resize)(cutorch_getstate(L), storage, len); + THCStorage_(resize)(cutorch_getstate(L), storage, len); memmove(storage->data, str, len); lua_settop(L, 1); } @@ -190,7 +190,7 @@ static int torch_Storage_(string)(lua_State *L) static int torch_Storage_(totable)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); long i; lua_newtable(L); @@ -204,14 +204,14 @@ static int torch_Storage_(totable)(lua_State *L) static int torch_Storage_(factory)(lua_State *L) { - THStorage *storage = THStorage_(new)(cutorch_getstate(L)); + THCStorage *storage = THCStorage_(new)(cutorch_getstate(L)); luaT_pushudata(L, storage, torch_Storage); return 1; } static int torch_Storage_(write)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); THFile *file = luaT_checkudata(L, 2, "torch.File"); THFile_writeLongScalar(file, storage->size); @@ -222,11 +222,11 @@ static int torch_Storage_(write)(lua_State *L) static int torch_Storage_(read)(lua_State *L) { - THStorage *storage = luaT_checkudata(L, 1, torch_Storage); + THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); THFile *file = luaT_checkudata(L, 2, "torch.File"); long size = THFile_readLongScalar(file); - THStorage_(resize)(cutorch_getstate(L), storage, size); + THCStorage_(resize)(cutorch_getstate(L), storage, size); THFile_readRealRaw(file, storage->data, storage->size); return 0; diff --git a/torch/generic/Tensor.c b/torch/generic/Tensor.c index c837050..bbed718 100644 --- a/torch/generic/Tensor.c +++ b/torch/generic/Tensor.c @@ -3,13 +3,13 @@ #else static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride, - THStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_); + THCStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_); static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_); static int torch_Tensor_(size)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; @@ -33,7 +33,7 @@ static int torch_Tensor_(elementSize)(lua_State *L) static int torch_Tensor_(stride)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; @@ -51,17 +51,17 @@ static int torch_Tensor_(stride)(lua_State *L) static int torch_Tensor_(nDimension)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); lua_pushnumber(L, tensor->nDimension); return 1; } static int torch_Tensor_(storage)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(tensor->storage) { - THStorage_(retain)(cutorch_getstate(L), tensor->storage); + THCStorage_(retain)(cutorch_getstate(L), tensor->storage); luaT_pushudata(L, tensor->storage, torch_Storage); } else @@ -72,7 +72,7 @@ static int torch_Tensor_(storage)(lua_State *L) static int torch_Tensor_(storageOffset)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); lua_pushnumber(L, tensor->storageOffset+1); return 1; } @@ -80,7 +80,7 @@ static int torch_Tensor_(storageOffset)(lua_State *L) static int torch_Tensor_(new)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor; + THCTensor *tensor; long storageOffset; THLongStorage *size, *stride; @@ -107,7 +107,7 @@ static int torch_Tensor_(new)(lua_State *L) counter = THLongStorage_newWithSize(size->size); THLongStorage_fill(counter, 0); - tensor = THTensor_(newWithSize)(state, size, NULL); + tensor = THCTensor_(newWithSize)(state, size, NULL); if(size->size == 0) is_finished = 1; @@ -118,7 +118,7 @@ static int torch_Tensor_(new)(lua_State *L) { THLongStorage_free(size); THLongStorage_free(counter); - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); luaL_error(L, "invalid tensor definition"); } @@ -126,7 +126,7 @@ static int torch_Tensor_(new)(lua_State *L) { THLongStorage_free(size); THLongStorage_free(counter); - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); luaL_error(L, "invalid tensor sizes"); } @@ -137,10 +137,10 @@ static int torch_Tensor_(new)(lua_State *L) { THLongStorage_free(size); THLongStorage_free(counter); - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); luaL_error(L, "invalid element (not a number)"); } - THStorage_(set)(state, THTensor_(storage)(state, tensor), si++, (real)lua_tonumber(L, -1)); + THCStorage_(set)(state, THCTensor_(storage)(state, tensor), si++, (real)lua_tonumber(L, -1)); lua_pop(L, 1); } @@ -171,14 +171,14 @@ static int torch_Tensor_(new)(lua_State *L) { THLongStorage_free(size); THLongStorage_free(counter); - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); luaL_error(L, "invalid tensor definition"); } if(lua_objlen(L, -1) != size->data[j]) { THLongStorage_free(size); THLongStorage_free(counter); - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); luaL_error(L, "invalid tensor sizes"); } lua_rawgeti(L, -1, counter->data[j]+1); @@ -193,12 +193,12 @@ static int torch_Tensor_(new)(lua_State *L) } else { - THStorage *storage; + THCStorage *storage; torch_Tensor_(c_readTensorStorageSizeStride)(L, 1, 1, 1, 1, 1, &storage, &storageOffset, &size, &stride); - tensor = THTensor_(newWithStorage)(state, storage, storageOffset, size, stride); + tensor = THCTensor_(newWithStorage)(state, storage, storageOffset, size, stride); THLongStorage_free(size); THLongStorage_free(stride); @@ -210,15 +210,15 @@ static int torch_Tensor_(new)(lua_State *L) static int torch_Tensor_(set)(lua_State *L) { - THTensor *self = luaT_checkudata(L, 1, torch_Tensor); - THStorage *storage; + THCTensor *self = luaT_checkudata(L, 1, torch_Tensor); + THCStorage *storage; long storageOffset; THLongStorage *size, *stride; torch_Tensor_(c_readTensorStorageSizeStride)(L, 2, 1, 1, 1, 1, &storage, &storageOffset, &size, &stride); - THTensor_(setStorage)(cutorch_getstate(L), self, storage, storageOffset, size, stride); + THCTensor_(setStorage)(cutorch_getstate(L), self, storage, storageOffset, size, stride); THLongStorage_free(size); THLongStorage_free(stride); @@ -229,16 +229,16 @@ static int torch_Tensor_(set)(lua_State *L) static int torch_Tensor_(clone)(lua_State *L) { - THTensor *self = luaT_checkudata(L, 1, torch_Tensor); - self = THTensor_(newClone)(cutorch_getstate(L), self); + THCTensor *self = luaT_checkudata(L, 1, torch_Tensor); + self = THCTensor_(newClone)(cutorch_getstate(L), self); luaT_pushudata(L, self, torch_Tensor); return 1; } static int torch_Tensor_(contiguous)(lua_State *L) { - THTensor *self = luaT_checkudata(L, 1, torch_Tensor); - self = THTensor_(newContiguous)(cutorch_getstate(L), self); + THCTensor *self = luaT_checkudata(L, 1, torch_Tensor); + self = THCTensor_(newContiguous)(cutorch_getstate(L), self); luaT_pushudata(L, self, torch_Tensor); return 1; } @@ -246,21 +246,21 @@ static int torch_Tensor_(contiguous)(lua_State *L) /* Resize */ static int torch_Tensor_(resizeAs)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); - THTensor *src = luaT_checkudata(L, 2, torch_Tensor); - THTensor_(resizeAs)(cutorch_getstate(L), tensor, src); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *src = luaT_checkudata(L, 2, torch_Tensor); + THCTensor_(resizeAs)(cutorch_getstate(L), tensor, src); lua_settop(L, 1); return 1; } static int torch_Tensor_(resize)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *size, *stride; torch_Tensor_(c_readSizeStride)(L, 2, 0, &size, &stride); - THTensor_(resize)(cutorch_getstate(L), tensor, size, stride); + THCTensor_(resize)(cutorch_getstate(L), tensor, size, stride); THLongStorage_free(size); THLongStorage_free(stride); @@ -272,7 +272,7 @@ static int torch_Tensor_(resize)(lua_State *L) static int torch_Tensor_(narrow)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension = luaL_checkint(L, 2)-1; long firstIndex = luaL_checklong(L, 3)-1; long size = luaL_checklong(L, 4); @@ -281,8 +281,8 @@ static int torch_Tensor_(narrow)(lua_State *L) THArgCheck( (firstIndex >= 0) && (firstIndex < tensor->size[dimension]), 3, "out of range"); THArgCheck( (size > 0) && (firstIndex+size <= tensor->size[dimension]), 4, "out of range"); */ - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, dimension, firstIndex, size); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, dimension, firstIndex, size); luaT_pushudata(L, tensor, torch_Tensor); return 1; } @@ -290,7 +290,7 @@ static int torch_Tensor_(narrow)(lua_State *L) static int torch_Tensor_(sub)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); long d0s = -1, d0e = -1, d1s = -1, d1e = -1, d2s = -1, d2e = -1, d3s = -1, d3e = -1; d0s = luaL_checklong(L, 2)-1; @@ -346,14 +346,14 @@ static int torch_Tensor_(sub)(lua_State *L) } } - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, d0s, d0e-d0s+1); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, d0s, d0e-d0s+1); if(d1s >= 0) - THTensor_(narrow)(state, tensor, NULL, 1, d1s, d1e-d1s+1); + THCTensor_(narrow)(state, tensor, NULL, 1, d1s, d1e-d1s+1); if(d2s >= 0) - THTensor_(narrow)(state, tensor, NULL, 2, d2s, d2e-d2s+1); + THCTensor_(narrow)(state, tensor, NULL, 2, d2s, d2e-d2s+1); if(d3s >= 0) - THTensor_(narrow)(state, tensor, NULL, 3, d3s, d3e-d3s+1); + THCTensor_(narrow)(state, tensor, NULL, 3, d3s, d3e-d3s+1); luaT_pushudata(L, tensor, torch_Tensor); return 1; } @@ -361,7 +361,7 @@ static int torch_Tensor_(sub)(lua_State *L) static int torch_Tensor_(select)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension = luaL_checkint(L, 2)-1; long sliceIndex = luaL_checklong(L, 3)-1; @@ -372,29 +372,30 @@ static int torch_Tensor_(select)(lua_State *L) if(tensor->nDimension > 1) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(select)(state, tensor, NULL, dimension, sliceIndex); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(select)(state, tensor, NULL, dimension, sliceIndex); luaT_pushudata(L, tensor, torch_Tensor); } else { THArgCheck(tensor->nDimension == 1, 1, "empty Tensor"); - lua_pushnumber(L, THTensor_(get1d)(state, tensor, sliceIndex)); + lua_pushnumber(L, THCTensor_(get1d)(state, tensor, sliceIndex)); } return 1; } +#ifdef THC_REAL_IS_FLOAT static int torch_Tensor_(indexSelect)(lua_State *L) { THCState *state = cutorch_getstate(L); int narg = lua_gettop(L); - THTensor *tensor, *src, *index; + THCTensor *tensor, *src, *index; THLongTensor *longIndex; int dim; if (narg == 3) { - tensor = THTensor_(new)(state); + tensor = THCTensor_(new)(state); src = luaT_checkudata(L, 1, torch_Tensor); dim = luaL_checkint(L, 2) - 1; index = luaT_toudata(L, 3, torch_Tensor); @@ -418,9 +419,9 @@ static int torch_Tensor_(indexSelect)(lua_State *L) } if (index) - THTensor_(indexSelect)(state, tensor,src,dim,index); + THCTensor_(indexSelect)(state, tensor,src,dim,index); else - THTensor_(indexSelect_long)(state, tensor,src,dim,longIndex); + THCTensor_(indexSelect_long)(state, tensor,src,dim,longIndex); return 1; } @@ -428,7 +429,7 @@ static int torch_Tensor_(indexSelect)(lua_State *L) static int torch_Tensor_(indexCopy)(lua_State *L) { int narg = lua_gettop(L); - THTensor *tensor, *src, *index; + THCTensor *tensor, *src, *index; THLongTensor *longIndex; int dim; if(narg == 4) @@ -447,9 +448,9 @@ static int torch_Tensor_(indexCopy)(lua_State *L) } if (index) - THTensor_(indexCopy)(cutorch_getstate(L), tensor,dim,index,src); + THCTensor_(indexCopy)(cutorch_getstate(L), tensor,dim,index,src); else - THTensor_(indexCopy_long)(cutorch_getstate(L), tensor,dim,longIndex,src); + THCTensor_(indexCopy_long)(cutorch_getstate(L), tensor,dim,longIndex,src); return 1; } @@ -457,7 +458,7 @@ static int torch_Tensor_(indexCopy)(lua_State *L) static int torch_Tensor_(indexAdd)(lua_State *L) { int narg = lua_gettop(L); - THTensor *tensor, *src, *index; + THCTensor *tensor, *src, *index; THLongTensor *longIndex; int dim; if(narg == 4) @@ -476,9 +477,9 @@ static int torch_Tensor_(indexAdd)(lua_State *L) } if (index) - THTensor_(indexAdd)(cutorch_getstate(L), tensor,dim,index,src); + THCTensor_(indexAdd)(cutorch_getstate(L), tensor,dim,index,src); else - THTensor_(indexAdd_long)(cutorch_getstate(L), tensor,dim,longIndex,src); + THCTensor_(indexAdd_long)(cutorch_getstate(L), tensor,dim,longIndex,src); return 1; } @@ -486,7 +487,7 @@ static int torch_Tensor_(indexAdd)(lua_State *L) static int torch_Tensor_(indexFill)(lua_State *L) { int narg = lua_gettop(L); - THTensor *tensor, *index; + THCTensor *tensor, *index; THLongTensor *longIndex; real val; int dim; @@ -506,17 +507,19 @@ static int torch_Tensor_(indexFill)(lua_State *L) } if (index) - THTensor_(indexFill)(cutorch_getstate(L), tensor,dim,index,val); + THCTensor_(indexFill)(cutorch_getstate(L), tensor,dim,index,val); else - THTensor_(indexFill_long)(cutorch_getstate(L), tensor,dim,longIndex,val); + THCTensor_(indexFill_long)(cutorch_getstate(L), tensor,dim,longIndex,val); return 1; } +#endif + static int torch_Tensor_(transpose)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension1 = luaL_checkint(L, 2)-1; int dimension2 = luaL_checkint(L, 3)-1; @@ -525,8 +528,8 @@ static int torch_Tensor_(transpose)(lua_State *L) THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 3, "out of range"); */ - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(transpose)(state, tensor, NULL, dimension1, dimension2); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(transpose)(state, tensor, NULL, dimension1, dimension2); luaT_pushudata(L, tensor, torch_Tensor); return 1; } @@ -534,12 +537,12 @@ static int torch_Tensor_(transpose)(lua_State *L) static int torch_Tensor_(t)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); luaL_argcheck(L, tensor->nDimension == 2, 1, "Tensor must have 2 dimensions"); - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(transpose)(state, tensor, NULL, 0, 1); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(transpose)(state, tensor, NULL, 0, 1); luaT_pushudata(L, tensor, torch_Tensor); return 1; } @@ -547,7 +550,7 @@ static int torch_Tensor_(t)(lua_State *L) static int torch_Tensor_(unfold)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); int dimension = luaL_checkint(L, 2)-1; long size = luaL_checklong(L, 3); long step = luaL_checklong(L, 4); @@ -558,8 +561,8 @@ static int torch_Tensor_(unfold)(lua_State *L) THArgCheck(size <= src->size[dimension], 3, "out of range"); */ - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(unfold)(state, tensor, NULL, dimension, size, step); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(unfold)(state, tensor, NULL, dimension, size, step); luaT_pushudata(L, tensor, torch_Tensor); return 1; } @@ -567,73 +570,76 @@ static int torch_Tensor_(unfold)(lua_State *L) /* is contiguous? [a bit like in TnXIterator] */ static int torch_Tensor_(isContiguous)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); - lua_pushboolean(L, THTensor_(isContiguous)(cutorch_getstate(L), tensor)); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + lua_pushboolean(L, THCTensor_(isContiguous)(cutorch_getstate(L), tensor)); return 1; } static int torch_Tensor_(isSize)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *size = luaT_checkudata(L, 2, "torch.LongStorage"); - lua_pushboolean(L, THTensor_(isSize)(cutorch_getstate(L), tensor, size)); + lua_pushboolean(L, THCTensor_(isSize)(cutorch_getstate(L), tensor, size)); return 1; } static int torch_Tensor_(isSetTo)(lua_State *L) { - THTensor *self = luaT_checkudata(L, 1, torch_Tensor); - THTensor *src = luaT_checkudata(L, 2, torch_Tensor); - lua_pushboolean(L, THTensor_(isSetTo)(cutorch_getstate(L), self, src)); + THCTensor *self = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *src = luaT_checkudata(L, 2, torch_Tensor); + lua_pushboolean(L, THCTensor_(isSetTo)(cutorch_getstate(L), self, src)); return 1; } static int torch_Tensor_(isSameSizeAs)(lua_State *L) { - THTensor *self = luaT_checkudata(L, 1, torch_Tensor); - THTensor *src = luaT_checkudata(L, 2, torch_Tensor); - lua_pushboolean(L, THTensor_(isSameSizeAs)(cutorch_getstate(L), self, src)); + THCTensor *self = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *src = luaT_checkudata(L, 2, torch_Tensor); + lua_pushboolean(L, THCTensor_(isSameSizeAs)(cutorch_getstate(L), self, src)); return 1; } static int torch_Tensor_(nElement)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); - lua_pushnumber(L, THTensor_(nElement)(cutorch_getstate(L), tensor)); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + lua_pushnumber(L, THCTensor_(nElement)(cutorch_getstate(L), tensor)); return 1; } static int torch_Tensor_(copy)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); void *src; if( (src = luaT_toudata(L, 2, torch_Tensor)) ) - THTensor_(copy)(state, tensor, src); + THCTensor_(copy)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) - THTensor_(copyByte)(state, tensor, src); + THCTensor_(copyByte)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) - THTensor_(copyChar)(state, tensor, src); + THCTensor_(copyChar)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) - THTensor_(copyShort)(state, tensor, src); + THCTensor_(copyShort)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) - THTensor_(copyInt)(state, tensor, src); + THCTensor_(copyInt)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) - THTensor_(copyLong)(state, tensor, src); + THCTensor_(copyLong)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) - THTensor_(copyFloat)(state, tensor, src); + THCTensor_(copyFloat)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) - THTensor_(copyDouble)(state, tensor, src); + THCTensor_(copyDouble)(state, tensor, src); else luaL_typerror(L, 2, "torch.*Tensor"); lua_settop(L, 1); return 1; } + + +#ifdef THC_REAL_IS_FLOAT static int torch_Tensor_(__newindex__)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *idx = NULL; THByteTensor *mask; THCudaTensor *maskCuda; @@ -649,53 +655,53 @@ static int torch_Tensor_(__newindex__)(lua_State *L) real value = (real)luaL_checknumber(L,3); if (tensor->nDimension == 1) { luaL_argcheck(L, index >= 0 && index < tensor->size[0], 2, "out of range"); - THStorage_(set)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0], value); } else { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(fill)(state, tensor, value); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(fill)(state, tensor, value); + THCTensor_(free)(state, tensor); } } else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copy)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copy)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.ByteTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyByte)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyByte)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.CharTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyChar)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyChar)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.ShortTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyShort)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyShort)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.IntTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyInt)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyInt)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.LongTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyLong)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyLong)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.FloatTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyFloat)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyFloat)(state, tensor, src); + THCTensor_(free)(state, tensor); } else if( (src = luaT_toudata(L, 3, "torch.DoubleTensor")) ) { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(narrow)(state, tensor, NULL, 0, index, 1); - THTensor_(copyDouble)(state, tensor, src); - THTensor_(free)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(narrow)(state, tensor, NULL, 0, index, 1); + THCTensor_(copyDouble)(state, tensor, src); + THCTensor_(free)(state, tensor); } else { luaL_typerror(L, 3, "torch.*Tensor"); } @@ -703,7 +709,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L) } else if((idx = luaT_toudata(L, 2, "torch.LongStorage"))) { - long index = THTensor_(storageOffset)(state, tensor); + long index = THCTensor_(storageOffset)(state, tensor); real value = (real)luaL_checknumber(L,3); int dim; @@ -717,7 +723,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L) index += z*tensor->stride[dim]; } - THStorage_(set)(state, tensor->storage, index, value); + THCStorage_(set)(state, tensor->storage, index, value); lua_pushboolean(L, 1); } else if(lua_istable(L, 2)) @@ -728,7 +734,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L) int done = 0; ndims = tensor->nDimension; luaL_argcheck(L, lua_objlen(L, 2) <= ndims, 2, "too many indices provided"); - tensor = THTensor_(newWithTensor)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); for(dim = 0; dim < ndims; dim++) { lua_rawgeti(L, 2, dim+1); @@ -741,9 +747,9 @@ static int torch_Tensor_(__newindex__)(lua_State *L) if(tensor->nDimension == 1) { real value = (real)luaL_checknumber(L,3); done = 1; - THStorage_(set)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0], value); + THCStorage_(set)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0], value); } else { - THTensor_(select)(state, tensor, NULL, cdim, z); + THCTensor_(select)(state, tensor, NULL, cdim, z); } } else if (lua_istable(L, -1)) @@ -769,7 +775,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L) luaL_argcheck(L, (end >= start), 2, "end index must be greater or equal to start index"); - THTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1); + THCTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1); } else { @@ -780,41 +786,41 @@ static int torch_Tensor_(__newindex__)(lua_State *L) /* doing a copy */ void *src; if (lua_isnumber(L,3)) { - THTensor_(fill)(state, tensor, lua_tonumber(L,3)); + THCTensor_(fill)(state, tensor, lua_tonumber(L,3)); } else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) { - THTensor_(copy)(state, tensor, src); + THCTensor_(copy)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.ByteTensor")) ) { - THTensor_(copyByte)(state, tensor, src); + THCTensor_(copyByte)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.CharTensor")) ) { - THTensor_(copyChar)(state, tensor, src); + THCTensor_(copyChar)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.ShortTensor")) ) { - THTensor_(copyShort)(state, tensor, src); + THCTensor_(copyShort)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.IntTensor")) ) { - THTensor_(copyInt)(state, tensor, src); + THCTensor_(copyInt)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.LongTensor")) ) { - THTensor_(copyLong)(state, tensor, src); + THCTensor_(copyLong)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.FloatTensor")) ) { - THTensor_(copyFloat)(state, tensor, src); + THCTensor_(copyFloat)(state, tensor, src); } else if( (src = luaT_toudata(L, 3, "torch.DoubleTensor")) ) { - THTensor_(copyDouble)(state, tensor, src); + THCTensor_(copyDouble)(state, tensor, src); } else { luaL_typerror(L, 3, "torch.*Tensor"); } } - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); lua_pushboolean(L, 1); } else if((mask = luaT_toudata(L, 2, "torch.ByteTensor"))) { - THTensor *vals; + THCTensor *vals; if (lua_isnumber(L, 3)) { - THTensor_(maskedFillByte)(state, tensor, mask, + THCTensor_(maskedFillByte)(state, tensor, mask, (real)(luaL_checknumber(L,3))); } else if((vals = luaT_toudata(L, 3, torch_Tensor))) { - THTensor_(maskedCopyByte)(state, tensor, mask, vals); + THCTensor_(maskedCopyByte)(state, tensor, mask, vals); } else { @@ -823,15 +829,15 @@ static int torch_Tensor_(__newindex__)(lua_State *L) } else if((maskCuda = luaT_toudata(L, 2, "torch.CudaTensor"))) { - THTensor *vals; + THCTensor *vals; if (lua_isnumber(L, 3)) { - THTensor_(maskedFill)(state, tensor, maskCuda, + THCTensor_(maskedFill)(state, tensor, maskCuda, (real)(luaL_checknumber(L,3))); } else if((vals = luaT_toudata(L, 3, torch_Tensor))) { - THTensor_(maskedCopy)(state, tensor, maskCuda, vals); + THCTensor_(maskedCopy)(state, tensor, maskCuda, vals); } else { @@ -847,7 +853,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L) static int torch_Tensor_(__index__)(lua_State *L) { THCState *state = cutorch_getstate(L); - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THLongStorage *idx = NULL; THByteTensor *mask; THCudaTensor *maskCuda; @@ -862,12 +868,12 @@ static int torch_Tensor_(__index__)(lua_State *L) if(tensor->nDimension == 1) { - lua_pushnumber(L, THStorage_(get)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0])); + lua_pushnumber(L, THCStorage_(get)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0])); } else { - tensor = THTensor_(newWithTensor)(state, tensor); - THTensor_(select)(state, tensor, NULL, 0, index); + tensor = THCTensor_(newWithTensor)(state, tensor); + THCTensor_(select)(state, tensor, NULL, 0, index); luaT_pushudata(L, tensor, torch_Tensor); } lua_pushboolean(L, 1); @@ -875,7 +881,7 @@ static int torch_Tensor_(__index__)(lua_State *L) } else if((idx = luaT_toudata(L, 2, "torch.LongStorage"))) { - long index = THTensor_(storageOffset)(state, tensor); + long index = THCTensor_(storageOffset)(state, tensor); int dim; luaL_argcheck(L, idx->size == tensor->nDimension, 2, "invalid size"); @@ -887,7 +893,7 @@ static int torch_Tensor_(__index__)(lua_State *L) luaL_argcheck(L, (z >= 0) && (z < tensor->size[dim]), 2, "index out of bound"); index += z*tensor->stride[dim]; } - lua_pushnumber(L, (double)THStorage_(get)(state, THTensor_(storage)(state, tensor), index)); + lua_pushnumber(L, (double)THCStorage_(get)(state, THCTensor_(storage)(state, tensor), index)); lua_pushboolean(L, 1); return 2; } @@ -900,7 +906,7 @@ static int torch_Tensor_(__index__)(lua_State *L) ndims = tensor->nDimension; luaL_argcheck(L, lua_objlen(L, 2) <= ndims, 2, "too many indices provided"); - tensor = THTensor_(newWithTensor)(state, tensor); + tensor = THCTensor_(newWithTensor)(state, tensor); for(dim = 0; dim < ndims; dim++) { @@ -913,9 +919,9 @@ static int torch_Tensor_(__index__)(lua_State *L) luaL_argcheck(L, (z >= 0) && (z < tensor->size[cdim]), 2, "index out of bound"); if(tensor->nDimension == 1) { done = 1; - lua_pushnumber(L, THStorage_(get)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0])); + lua_pushnumber(L, THCStorage_(get)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0])); } else { - THTensor_(select)(state, tensor, NULL, cdim, z); + THCTensor_(select)(state, tensor, NULL, cdim, z); } } else if (lua_istable(L, -1)) @@ -941,7 +947,7 @@ static int torch_Tensor_(__index__)(lua_State *L) luaL_argcheck(L, (end >= start), 2, "end index must be greater or equal to start index"); - THTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1); + THCTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1); } else { @@ -951,23 +957,23 @@ static int torch_Tensor_(__index__)(lua_State *L) if(!done) { luaT_pushudata(L, tensor, torch_Tensor); } else { - THTensor_(free)(state, tensor); + THCTensor_(free)(state, tensor); } lua_pushboolean(L, 1); return 2; } else if((mask = luaT_toudata(L, 2, "torch.ByteTensor"))) { - THTensor *vals = THTensor_(new)(state); - THTensor_(maskedSelectByte)(state, vals, tensor, mask); + THCTensor *vals = THCTensor_(new)(state); + THCTensor_(maskedSelectByte)(state, vals, tensor, mask); luaT_pushudata(L, vals, torch_Tensor); lua_pushboolean(L, 1); return 2; } else if((maskCuda = luaT_toudata(L, 2, "torch.CudaTensor"))) { - THTensor *vals = THTensor_(new)(state); - THTensor_(maskedSelect)(state, vals, tensor, maskCuda); + THCTensor *vals = THCTensor_(new)(state); + THCTensor_(maskedSelect)(state, vals, tensor, maskCuda); luaT_pushudata(L, vals, torch_Tensor); lua_pushboolean(L, 1); return 2; @@ -978,18 +984,19 @@ static int torch_Tensor_(__index__)(lua_State *L) return 1; } } +#endif static int torch_Tensor_(retain)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); - THTensor_(retain)(cutorch_getstate(L), tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor_(retain)(cutorch_getstate(L), tensor); return 0; } static int torch_Tensor_(free)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); - THTensor_(free)(cutorch_getstate(L), tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor_(free)(cutorch_getstate(L), tensor); return 0; } @@ -1050,11 +1057,11 @@ static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowSt } static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride, - THStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_) + THCStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_) { THCState *state = cutorch_getstate(L); - THTensor *src = NULL; - THStorage *storage = NULL; + THCTensor *src = NULL; + THCStorage *storage = NULL; int arg1Type = lua_type(L, index); @@ -1070,8 +1077,8 @@ static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index { *storage_ = src->storage; *storageOffset_ = src->storageOffset; - *size_ = THTensor_(newSizeOf)(state, src); - *stride_ = THTensor_(newStrideOf)(state, src); + *size_ = THCTensor_(newSizeOf)(state, src); + *stride_ = THCTensor_(newStrideOf)(state, src); return; } else if( allowStorage && (arg1Type == LUA_TUSERDATA) && (storage = luaT_toudata(L, index, torch_Storage)) ) @@ -1114,14 +1121,14 @@ static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index static int torch_Tensor_(factory)(lua_State *L) { - THTensor *tensor = THTensor_(new)(cutorch_getstate(L)); + THCTensor *tensor = THCTensor_(new)(cutorch_getstate(L)); luaT_pushudata(L, tensor, torch_Tensor); return 1; } static int torch_Tensor_(write)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THFile *file = luaT_checkudata(L, 2, "torch.File"); THFile_writeIntScalar(file, tensor->nDimension); @@ -1134,7 +1141,7 @@ static int torch_Tensor_(write)(lua_State *L) /* the storage */ if(tensor->storage) { - THStorage_(retain)(cutorch_getstate(L), tensor->storage); + THCStorage_(retain)(cutorch_getstate(L), tensor->storage); luaT_pushudata(L, tensor->storage, torch_Storage); } else @@ -1147,7 +1154,7 @@ static int torch_Tensor_(write)(lua_State *L) static int torch_Tensor_(read)(lua_State *L) { - THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); + THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); THFile *file = luaT_checkudata(L, 2, "torch.File"); tensor->nDimension = THFile_readIntScalar(file); @@ -1164,7 +1171,7 @@ static int torch_Tensor_(read)(lua_State *L) tensor->storage = luaT_toudata(L, -1, torch_Storage); if(tensor->storage) - THStorage_(retain)(cutorch_getstate(L), tensor->storage); + THCStorage_(retain)(cutorch_getstate(L), tensor->storage); return 0; } @@ -1189,10 +1196,12 @@ static const struct luaL_Reg torch_Tensor_(_) [] = { {"narrow", torch_Tensor_(narrow)}, {"sub", torch_Tensor_(sub)}, {"select", torch_Tensor_(select)}, +#ifdef THC_REAL_IS_FLOAT {"index", torch_Tensor_(indexSelect)}, {"indexCopy", torch_Tensor_(indexCopy)}, {"indexAdd", torch_Tensor_(indexAdd)}, {"indexFill", torch_Tensor_(indexFill)}, +#endif {"transpose", torch_Tensor_(transpose)}, {"t", torch_Tensor_(t)}, {"unfold", torch_Tensor_(unfold)}, @@ -1204,8 +1213,10 @@ static const struct luaL_Reg torch_Tensor_(_) [] = { {"copy", torch_Tensor_(copy)}, {"read", torch_Tensor_(read)}, {"write", torch_Tensor_(write)}, +#ifdef THC_REAL_IS_FLOAT {"__index__", torch_Tensor_(__index__)}, {"__newindex__", torch_Tensor_(__newindex__)}, +#endif {NULL, NULL} }; |