Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/cutorch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--FFI.lua70
-rw-r--r--Storage.c12
-rw-r--r--Tensor.c13
-rw-r--r--Tensor.lua67
-rw-r--r--generic/CStorage.c193
-rw-r--r--generic/CTensor.c238
-rw-r--r--init.c28
-rw-r--r--init.lua20
-rw-r--r--lib/THC/CMakeLists.txt15
-rw-r--r--lib/THC/THCGenerateAllTypes.h114
-rw-r--r--lib/THC/THCStorage.c6
-rw-r--r--lib/THC/THCStorage.cu10
-rw-r--r--lib/THC/THCStorage.h17
-rw-r--r--lib/THC/THCStorageCopy.c5
-rw-r--r--lib/THC/THCStorageCopy.cu5
-rw-r--r--lib/THC/THCStorageCopy.h10
-rw-r--r--lib/THC/THCTensor.c7
-rw-r--r--lib/THC/THCTensor.cu4
-rw-r--r--lib/THC/THCTensor.h14
-rw-r--r--lib/THC/THCTensorCopy.c6
-rw-r--r--lib/THC/THCTensorCopy.cu10
-rw-r--r--lib/THC/THCTensorCopy.h10
-rw-r--r--lib/THC/generic/THCStorage.c95
-rw-r--r--lib/THC/generic/THCStorage.cu31
-rw-r--r--lib/THC/generic/THCStorage.h61
-rw-r--r--lib/THC/generic/THCStorageCopy.c59
-rw-r--r--lib/THC/generic/THCStorageCopy.cu46
-rw-r--r--lib/THC/generic/THCStorageCopy.h53
-rw-r--r--lib/THC/generic/THCTensor.c293
-rw-r--r--lib/THC/generic/THCTensor.cu20
-rw-r--r--lib/THC/generic/THCTensor.h155
-rw-r--r--lib/THC/generic/THCTensorCopy.c138
-rw-r--r--lib/THC/generic/THCTensorCopy.cu69
-rw-r--r--lib/THC/generic/THCTensorCopy.h52
-rw-r--r--test/test.lua100
-rw-r--r--torch/generic/Storage.c78
-rw-r--r--torch/generic/Tensor.c359
37 files changed, 1490 insertions, 993 deletions
diff --git a/FFI.lua b/FFI.lua
index 0afa3db..58d9217 100644
--- a/FFI.lua
+++ b/FFI.lua
@@ -33,49 +33,71 @@ typedef struct THCState
cudaStream_t THCState_getCurrentStream(THCState *state);
-typedef struct THCudaStorage
+]]
+
+ local CudaTypes = {
+ {'float', ''},
+ {'unsigned char', 'Byte'},
+ {'char', 'Char'},
+ {'short', 'Short'},
+ {'int', 'Int'},
+ {'long','Long'},
+ {'double','Double'},
+ }
+
+ for _, typedata in ipairs(CudaTypes) do
+ local real, Real = unpack(typedata)
+ ctype_def = [[
+typedef struct THCStorage
{
- float *data;
+ real *data;
long size;
int refcount;
char flag;
THAllocator *allocator;
void *allocatorContext;
- struct THCudaStorage *view;
-} THCudaStorage;
+ struct THCStorage *view;
+} THCStorage;
-typedef struct THCudaTensor
+typedef struct THCTensor
{
long *size;
long *stride;
int nDimension;
- THCudaStorage *storage;
+ THCStorage *storage;
long storageOffset;
int refcount;
char flag;
-} THCudaTensor;
+} THCTensor;
]]
- ffi.cdef(cdefs)
- local Storage = torch.getmetatable('torch.CudaStorage')
- local Storage_tt = ffi.typeof('THCudaStorage**')
-
- rawset(Storage, "cdata", function(self) return Storage_tt(self)[0] end)
- rawset(Storage, "data", function(self) return Storage_tt(self)[0].data end)
- -- Tensor
- local Tensor = torch.getmetatable('torch.CudaTensor')
- local Tensor_tt = ffi.typeof('THCudaTensor**')
-
- rawset(Tensor, "cdata", function(self) return Tensor_tt(self)[0] end)
+ ctype_def = ctype_def:gsub('real',real):gsub('THCStorage','THCuda'..Real..'Storage'):gsub('THCTensor','THCuda'..Real..'Tensor')
+ cdefs = cdefs .. ctype_def
+ end
+ ffi.cdef(cdefs)
- rawset(Tensor, "data",
- function(self)
- self = Tensor_tt(self)[0]
- return self.storage ~= nil and self.storage.data + self.storageOffset or nil
- end
- )
+ for _, typedata in ipairs(CudaTypes) do
+ local real, Real = unpack(typedata)
+ local Storage = torch.getmetatable('torch.Cuda' .. Real .. 'Storage')
+ local Storage_tt = ffi.typeof('THCuda' .. Real .. 'Storage**')
+
+ rawset(Storage, "cdata", function(self) return Storage_tt(self)[0] end)
+ rawset(Storage, "data", function(self) return Storage_tt(self)[0].data end)
+ -- Tensor
+ local Tensor = torch.getmetatable('torch.Cuda' .. Real .. 'Tensor')
+ local Tensor_tt = ffi.typeof('THCuda' .. Real .. 'Tensor**')
+
+ rawset(Tensor, "cdata", function(self) return Tensor_tt(self)[0] end)
+
+ rawset(Tensor, "data",
+ function(self)
+ self = Tensor_tt(self)[0]
+ return self.storage ~= nil and self.storage.data + self.storageOffset or nil
+ end
+ )
+ end
end
diff --git a/Storage.c b/Storage.c
new file mode 100644
index 0000000..a7478a0
--- /dev/null
+++ b/Storage.c
@@ -0,0 +1,12 @@
+#include "torch/utils.h"
+#include "THC.h"
+#include "THFile.h"
+#include "luaT.h"
+
+#define torch_Storage_(NAME) TH_CONCAT_4(torch_,CReal,Storage_,NAME)
+#define torch_Storage TH_CONCAT_STRING_3(torch.,CReal,Storage)
+#define cutorch_Storage_(NAME) TH_CONCAT_4(cutorch_,CReal,Storage_,NAME)
+
+#include "generic/CStorage.c"
+#include "THCGenerateAllTypes.h"
+
diff --git a/Tensor.c b/Tensor.c
new file mode 100644
index 0000000..3adeddd
--- /dev/null
+++ b/Tensor.c
@@ -0,0 +1,13 @@
+#include "torch/utils.h"
+#include "THC.h"
+#include "THFile.h"
+#include "luaT.h"
+
+#define torch_Storage_(NAME) TH_CONCAT_4(torch_,CReal,Storage_,NAME)
+#define torch_Storage TH_CONCAT_STRING_3(torch.,CReal,Storage)
+#define torch_Tensor_(NAME) TH_CONCAT_4(torch_,CReal,Tensor_,NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch.,CReal,Tensor)
+#define cutorch_Tensor_(NAME) TH_CONCAT_4(cutorch_,CReal,Tensor_,NAME)
+
+#include "generic/CTensor.c"
+#include "THCGenerateAllTypes.h"
diff --git a/Tensor.lua b/Tensor.lua
index f9c1ca4..32be5c7 100644
--- a/Tensor.lua
+++ b/Tensor.lua
@@ -21,55 +21,42 @@ end
local function Tensor__typeAs(self,tensor)
return self:type(tensor:type())
end
-local function Tensor__cuda(self)
- return self:type('torch.CudaTensor')
-end
-local function Tensor__double(self)
- return self:type('torch.DoubleTensor')
-end
-local function Tensor__float(self)
- return self:type('torch.FloatTensor')
-end
-local function Tensor__byte(self)
- return self:type('torch.ByteTensor')
-end
+local TensorTypes = {
+ float = 'torch.FloatTensor',
+ double = 'torch.DoubleTensor',
+ byte = 'torch.ByteTensor',
+ char = 'torch.CharTensor',
+ int = 'torch.IntTensor',
+ short = 'torch.ShortTensor',
+ long = 'torch.LongTensor',
+ cuda = 'torch.CudaTensor',
+ cudaDouble = 'torch.CudaDoubleTensor',
+ cudaByte = 'torch.CudaByteTensor',
+ cudaChar = 'torch.CudaCharTensor',
+ cudaInt = 'torch.CudaIntTensor',
+ cudaShort = 'torch.CudaShortTensor',
+ cudaLong = 'torch.CudaLongTensor'
+}
-local function Tensor__char(self)
- return self:type('torch.CharTensor')
-end
-local function Tensor__int(self)
- return self:type('torch.IntTensor')
+local function Tensor__converter(type)
+ return function(self)
+ return self:type(type)
+ end
end
-local function Tensor__short(self)
- return self:type('torch.ShortTensor')
+for _, SrcType in pairs(TensorTypes) do
+ for FuncName, DstType in pairs(TensorTypes) do
+ rawset(torch.getmetatable(SrcType), FuncName, Tensor__converter(DstType))
+ end
end
-local function Tensor__long(self)
- return self:type('torch.LongTensor')
+for _, CudaTensorType in pairs(TensorTypes) do
+ rawset(torch.getmetatable(CudaTensorType), 'type', Tensor__type)
+ rawset(torch.getmetatable(CudaTensorType), 'typeAs', Tensor__typeAs)
end
-rawset(torch.getmetatable('torch.DoubleTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.FloatTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.ByteTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.CharTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.IntTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.ShortTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.LongTensor'), 'cuda', Tensor__cuda)
-rawset(torch.getmetatable('torch.CudaTensor'), 'cuda', Tensor__cuda)
-
-rawset(torch.getmetatable('torch.CudaTensor'), 'type', Tensor__type)
-rawset(torch.getmetatable('torch.CudaTensor'), 'typeAs', Tensor__typeAs)
-rawset(torch.getmetatable('torch.CudaTensor'), 'double', Tensor__double)
-rawset(torch.getmetatable('torch.CudaTensor'), 'float', Tensor__float)
-rawset(torch.getmetatable('torch.CudaTensor'), 'byte', Tensor__byte)
-rawset(torch.getmetatable('torch.CudaTensor'), 'char', Tensor__char)
-rawset(torch.getmetatable('torch.CudaTensor'), 'int', Tensor__int)
-rawset(torch.getmetatable('torch.CudaTensor'), 'short', Tensor__short)
-rawset(torch.getmetatable('torch.CudaTensor'), 'long', Tensor__long)
-
do
local metatable = torch.getmetatable('torch.CudaTensor')
for _,func in pairs{'expand', 'expandAs', 'view', 'viewAs', 'repeatTensor',
diff --git a/generic/CStorage.c b/generic/CStorage.c
index 11ea696..c5626d4 100644
--- a/generic/CStorage.c
+++ b/generic/CStorage.c
@@ -1,65 +1,68 @@
-#include "torch/utils.h"
-#include "THC.h"
-#include "THFile.h"
-#include "luaT.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/CStorage.c"
+#else
/* everything is as the generic Storage.c, except few things (see below) */
-#define real float
-#define Real Cuda
-#define TH_GENERIC_FILE "generic/Storage.c"
-
-#define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME)
-
#define THFile_readRealRaw(file, data, size) \
{ \
- float *fdata = (float*)THAlloc(sizeof(float)*size); \
- THFile_readFloatRaw(file, fdata, size); \
- THCudaCheck(cudaMemcpy(data, fdata, size * sizeof(float), cudaMemcpyHostToDevice)); \
+ real *fdata = (real*)THAlloc(sizeof(real)*size); \
+ TH_CONCAT_3(THFile_read,Real,Raw)(file, fdata, size); \
+ THCudaCheck(cudaMemcpy(data, fdata, size * sizeof(real), cudaMemcpyHostToDevice)); \
THFree(fdata); \
}
#define THFile_writeRealRaw(file, data, size) \
{ \
- float *fdata = (float*)THAlloc(sizeof(float)*size); \
- THCudaCheck(cudaMemcpy(fdata, data, size * sizeof(float), cudaMemcpyDeviceToHost)); \
- THFile_writeFloatRaw(file, fdata, size); \
+ real *fdata = (real*)THAlloc(sizeof(real)*size); \
+ THCudaCheck(cudaMemcpy(fdata, data, size * sizeof(real), cudaMemcpyDeviceToHost)); \
+ TH_CONCAT_3(THFile_write,Real,Raw)(file, fdata, size); \
THFree(fdata); \
}
-#define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage)
-
+#define TH_GENERIC_FILE "generic/Storage.c"
#include "generic/Storage.c"
-#undef real
-#undef Real
#undef TH_GENERIC_FILE
+#undef THFile_readRealRaw
+#undef THFile_writeRealRaw
/* now we overwrite some methods specific to CudaStorage */
-static int cutorch_CudaStorage_copy(lua_State *L)
+static int cutorch_Storage_(copy)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THCudaStorage *storage = luaT_checkudata(L, 1, "torch.CudaStorage");
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
void *src;
- if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) )
- THCudaStorage_copy(state, storage, src);
+ if( (src = luaT_toudata(L, 2, "torch.CudaByteStorage")) )
+ THCStorage_(copyCudaByte)(state, storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaCharStorage")) )
+ THCStorage_(copyCudaChar)(state, storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaShortStorage")) )
+ THCStorage_(copyCudaShort)(state, storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaIntStorage")) )
+ THCStorage_(copyCudaInt)(state, storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaLongStorage")) )
+ THCStorage_(copyCudaLong)(state, storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) )
+ THCStorage_(copyCudaFloat)(state, storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleStorage")) )
+ THCStorage_(copyCudaDouble)(state, storage, src);
+
else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) )
- THCudaStorage_copyByte(state, storage, src);
+ THCStorage_(copyByte)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) )
- THCudaStorage_copyChar(state, storage, src);
+ THCStorage_(copyChar)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) )
- THCudaStorage_copyShort(state, storage, src);
+ THCStorage_(copyShort)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) )
- THCudaStorage_copyInt(state, storage, src);
+ THCStorage_(copyInt)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) )
- THCudaStorage_copyLong(state, storage, src);
+ THCStorage_(copyLong)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) )
- THCudaStorage_copyFloat(state, storage, src);
+ THCStorage_(copyFloat)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) )
- THCudaStorage_copyDouble(state, storage, src);
- else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) )
- THCudaStorage_copyCuda(state, storage, src);
+ THCStorage_(copyDouble)(state, storage, src);
else
luaL_typerror(L, 2, "torch.*Storage");
@@ -67,77 +70,63 @@ static int cutorch_CudaStorage_copy(lua_State *L)
return 1;
}
-#define CUDA_IMPLEMENT_STORAGE_COPY(TYPEC) \
- static int cutorch_##TYPEC##Storage_copy(lua_State *L) \
- { \
- TH##TYPEC##Storage *storage = luaT_checkudata(L, 1, "torch." #TYPEC "Storage"); \
- void *src; \
- if( (src = luaT_toudata(L, 2, "torch." #TYPEC "Storage")) ) \
- TH##TYPEC##Storage_copy(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) \
- TH##TYPEC##Storage_copyByte(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) \
- TH##TYPEC##Storage_copyChar(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) \
- TH##TYPEC##Storage_copyShort(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) \
- TH##TYPEC##Storage_copyInt(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) \
- TH##TYPEC##Storage_copyLong(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) \
- TH##TYPEC##Storage_copyFloat(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) \
- TH##TYPEC##Storage_copyDouble(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) \
- TH##TYPEC##Storage_copyCuda(cutorch_getstate(L), storage, src); \
- else \
- luaL_typerror(L, 2, "torch.*Storage"); \
- \
- lua_settop(L, 1); \
- return 1; \
-}
+static int TH_CONCAT_3(cutorch_,Real,Storage_copy)(lua_State *L)
+{
+ THStorage *storage = luaT_checkudata(L, 1, TH_CONCAT_STRING_3(torch.,Real,Storage));
+ void *src;
+ if( (src = luaT_toudata(L, 2, TH_CONCAT_STRING_3(torch.,Real,Storage) )))
+ THStorage_(copy)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) )
+ THStorage_(copyByte)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) )
+ THStorage_(copyChar)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) )
+ THStorage_(copyShort)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) )
+ THStorage_(copyInt)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) )
+ THStorage_(copyLong)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) )
+ THStorage_(copyFloat)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) )
+ THStorage_(copyDouble)(storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) )
+ THStorage_(copyCudaFloat)(cutorch_getstate(L), storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaLongStorage")) )
+ THStorage_(copyCudaLong)(cutorch_getstate(L), storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaByteStorage")) )
+ THStorage_(copyCudaByte)(cutorch_getstate(L), storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaCharStorage")) )
+ THStorage_(copyCudaChar)(cutorch_getstate(L), storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaShortStorage")) )
+ THStorage_(copyCudaShort)(cutorch_getstate(L), storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaIntStorage")) )
+ THStorage_(copyCudaInt)(cutorch_getstate(L), storage, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleStorage")) )
+ THStorage_(copyCudaDouble)(cutorch_getstate(L), storage, src);
+ else
+ luaL_typerror(L, 2, "torch.*Storage");
-CUDA_IMPLEMENT_STORAGE_COPY(Byte)
-CUDA_IMPLEMENT_STORAGE_COPY(Char)
-CUDA_IMPLEMENT_STORAGE_COPY(Short)
-CUDA_IMPLEMENT_STORAGE_COPY(Int)
-CUDA_IMPLEMENT_STORAGE_COPY(Long)
-CUDA_IMPLEMENT_STORAGE_COPY(Float)
-CUDA_IMPLEMENT_STORAGE_COPY(Double)
+ lua_settop(L, 1);
+ return 1;
+}
-void cutorch_CudaStorage_init(lua_State* L)
+void cutorch_Storage_(init)(lua_State* L)
{
/* the standard stuff */
- torch_CudaStorage_init(L);
-
- /* the copy methods */
- {
- int i;
-
- const void* tnames[8] = {"torch.ByteStorage",
- "torch.CharStorage",
- "torch.ShortStorage",
- "torch.IntStorage",
- "torch.LongStorage",
- "torch.FloatStorage",
- "torch.DoubleStorage",
- "torch.CudaStorage"};
-
- static int (*funcs[8])(lua_State*) = {cutorch_ByteStorage_copy,
- cutorch_CharStorage_copy,
- cutorch_ShortStorage_copy,
- cutorch_IntStorage_copy,
- cutorch_LongStorage_copy,
- cutorch_FloatStorage_copy,
- cutorch_DoubleStorage_copy,
- cutorch_CudaStorage_copy};
-
- for(i = 0; i < 8; i++)
- {
- luaT_pushmetatable(L, tnames[i]);
- lua_pushcfunction(L, funcs[i]);
- lua_setfield(L, -2, "copy");
- lua_pop(L, 1);
- }
- }
+ torch_Storage_(init)(L);
+
+ // torch_Storage macro is defined in Storage.c produce the CudaTensor types
+ // so I have to construct the normal torch types by hand
+ luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Storage));
+ lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Storage_copy));
+ lua_setfield(L, -2, "copy");
+ lua_pop(L, 1);
+
+ luaT_pushmetatable(L, torch_Storage);
+ lua_pushcfunction(L, cutorch_Storage_(copy));
+ lua_setfield(L, -2, "copy");
+ lua_pop(L, 1);
}
+
+#endif
diff --git a/generic/CTensor.c b/generic/CTensor.c
index 5666dec..79a8a48 100644
--- a/generic/CTensor.c
+++ b/generic/CTensor.c
@@ -1,49 +1,48 @@
-#include "torch/utils.h"
-#include "THC.h"
-#include "THFile.h"
-#include "luaT.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/CTensor.c"
+#else
/* everything is as the generic Storage.c, except few things (see below) */
-#define real float
-#define Real Cuda
-
-#define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME)
-#define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage)
-#define torch_Tensor_(NAME) TH_CONCAT_4(torch_,Real,Tensor_,NAME)
-#define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor)
-
#define TH_GENERIC_FILE "generic/Tensor.c"
#include "generic/Tensor.c"
#undef TH_GENERIC_FILE
-#undef real
-#undef Real
-
/* now we overwrite some methods specific to CudaTensor */
-static int cutorch_CudaTensor_copy(lua_State *L)
+static int cutorch_Tensor_(copy)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THCudaTensor *storage = luaT_checkudata(L, 1, "torch.CudaTensor");
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
void *src;
if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) )
- THCudaTensor_copy(state, storage, src);
+ THCTensor_(copyCudaFloat)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaByteTensor")) )
+ THCTensor_(copyCudaByte)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaCharTensor")) )
+ THCTensor_(copyCudaChar)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaShortTensor")) )
+ THCTensor_(copyCudaShort)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaIntTensor")) )
+ THCTensor_(copyCudaInt)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaLongTensor")) )
+ THCTensor_(copyCudaLong)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleTensor")) )
+ THCTensor_(copyCudaDouble)(state, tensor, src);
+
else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) )
- THCudaTensor_copyByte(state, storage, src);
+ THCTensor_(copyByte)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) )
- THCudaTensor_copyChar(state, storage, src);
+ THCTensor_(copyChar)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) )
- THCudaTensor_copyShort(state, storage, src);
+ THCTensor_(copyShort)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) )
- THCudaTensor_copyInt(state, storage, src);
+ THCTensor_(copyInt)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) )
- THCudaTensor_copyLong(state, storage, src);
+ THCTensor_(copyLong)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) )
- THCudaTensor_copyFloat(state, storage, src);
+ THCTensor_(copyFloat)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) )
- THCudaTensor_copyDouble(state, storage, src);
- else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) )
- THCudaTensor_copyCuda(state, storage, src);
+ THCTensor_(copyDouble)(state, tensor, src);
else
luaL_typerror(L, 2, "torch.*Tensor");
@@ -51,73 +50,84 @@ static int cutorch_CudaTensor_copy(lua_State *L)
return 1;
}
-static int cutorch_CudaTensor_copyAsync(lua_State *L)
+static int cutorch_Tensor_(copyAsyncCPU)(lua_State *L)
{
+#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor)
THCState *state = cutorch_getstate(L);
- THCudaTensor *storage = luaT_checkudata(L, 1, "torch.CudaTensor");
+ THCTensor *tensor = luaT_checkudata(L, 1, STRINGIFY_TENSOR(CReal));
void *src;
- if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) )
- THCudaTensor_copy(state, storage, src);
- else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) )
- THCudaTensor_copyAsyncFloat(state, storage, src);
+ if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(CReal))))
+ THCTensor_(copy)(state, tensor, src);
+ else if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(Real))))
+ THCTensor_(copyAsyncCPU)(state, tensor, src);
else
- luaL_typerror(L, 2, "torch.FloatTensor or torch.CudaTensor");
+ luaL_typerror(L, 2, STRINGIFY_TENSOR(Real) " or " STRINGIFY_TENSOR(CReal));
lua_settop(L, 1);
return 1;
+#undef STRINGIFY_TENSOR
}
-#define CUDA_IMPLEMENT_TENSOR_COPY(TYPEC) \
- static int cutorch_##TYPEC##Tensor_copy(lua_State *L) \
- { \
- TH##TYPEC##Tensor *storage = luaT_checkudata(L, 1, "torch." #TYPEC "Tensor"); \
- void *src; \
- if( (src = luaT_toudata(L, 2, "torch." #TYPEC "Tensor")) ) \
- TH##TYPEC##Tensor_copy(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) \
- TH##TYPEC##Tensor_copyByte(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) \
- TH##TYPEC##Tensor_copyChar(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) \
- TH##TYPEC##Tensor_copyShort(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) \
- TH##TYPEC##Tensor_copyInt(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) \
- TH##TYPEC##Tensor_copyLong(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) \
- TH##TYPEC##Tensor_copyFloat(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) \
- TH##TYPEC##Tensor_copyDouble(storage, src); \
- else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) \
- TH##TYPEC##Tensor_copyCuda(cutorch_getstate(L), storage, src); \
- else \
- luaL_typerror(L, 2, "torch.*Tensor"); \
- \
- lua_settop(L, 1); \
- return 1; \
- }
-CUDA_IMPLEMENT_TENSOR_COPY(Byte)
-CUDA_IMPLEMENT_TENSOR_COPY(Char)
-CUDA_IMPLEMENT_TENSOR_COPY(Short)
-CUDA_IMPLEMENT_TENSOR_COPY(Int)
-CUDA_IMPLEMENT_TENSOR_COPY(Long)
-CUDA_IMPLEMENT_TENSOR_COPY(Float)
-CUDA_IMPLEMENT_TENSOR_COPY(Double)
+static int TH_CONCAT_3(cutorch_,Real,Tensor_copy)(lua_State *L)
+{
+ THTensor *tensor = luaT_checkudata(L, 1, TH_CONCAT_STRING_3(torch.,Real,Tensor));
+ void *src;
+ if( (src = luaT_toudata(L, 2, TH_CONCAT_STRING_3(torch.,Real,Tensor)) ))
+ THTensor_(copy)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) )
+ THTensor_(copyByte)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) )
+ THTensor_(copyChar)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) )
+ THTensor_(copyShort)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) )
+ THTensor_(copyInt)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) )
+ THTensor_(copyLong)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) )
+ THTensor_(copyFloat)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) )
+ THTensor_(copyDouble)(tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaByteTensor")) )
+ THTensor_(copyCudaByte)(cutorch_getstate(L), tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaCharTensor")) )
+ THTensor_(copyCudaChar)(cutorch_getstate(L), tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaShortTensor")) )
+ THTensor_(copyCudaShort)(cutorch_getstate(L), tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaIntTensor")) )
+ THTensor_(copyCudaInt)(cutorch_getstate(L), tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaLongTensor")) )
+ THTensor_(copyCudaLong)(cutorch_getstate(L), tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) )
+ THTensor_(copyCudaFloat)(cutorch_getstate(L), tensor, src);
+ else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleTensor")) )
+ THTensor_(copyCudaDouble)(cutorch_getstate(L), tensor, src);
+ else
+ luaL_typerror(L, 2, "torch.*Tensor");
+
+ lua_settop(L, 1);
+ return 1;
+}
-static int cutorch_FloatTensor_copyAsync(lua_State *L)
+static int TH_CONCAT_3(cutorch_,Real,Tensor_copyAsyncCuda)(lua_State *L)
{
- THFloatTensor *storage = luaT_checkudata(L, 1, "torch.FloatTensor");
+#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor)
+ THTensor *tensor = luaT_checkudata(L, 1, STRINGIFY_TENSOR(Real));
void *src;
- if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) )
- THFloatTensor_copyAsyncCuda(cutorch_getstate(L), storage, src);
+ if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(CReal))))
+ THTensor_(copyAsyncCuda)(cutorch_getstate(L), tensor, src);
else
- luaL_typerror(L, 2, "torch.CudaTensor");
+ luaL_typerror(L, 2, STRINGIFY_TENSOR(CReal));
lua_settop(L, 1);
return 1;
+#undef STRINGIFY_TENSOR
}
+
+
+#ifdef THC_REAL_IS_FLOAT
static void THFloatTensor_computesz(THFloatTensor *self, long **sz_, long **st_)
{
long *sz, *st, *szh;
@@ -201,69 +211,55 @@ static int cuda_FloatTensor_fakecopy(lua_State *L)
lua_settop(L, 1);
return 1;
}
+#endif
-static int cutorch_CudaTensor_getDevice(lua_State *L) {
- THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor");
- lua_pushinteger(L, THCudaTensor_getDevice(cutorch_getstate(L), tensor) + 1);
+static int cutorch_Tensor_(getDevice)(lua_State *L) {
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ lua_pushinteger(L, THCTensor_(getDevice)(cutorch_getstate(L), tensor) + 1);
return 1;
}
-void cutorch_CudaTensor_init(lua_State* L)
+void cutorch_Tensor_(init)(lua_State* L)
{
/* the standard stuff */
- torch_CudaTensor_init(L);
+ torch_Tensor_(init)(L);
/* additional methods */
+#ifdef THC_REAL_IS_FLOAT
luaT_pushmetatable(L, "torch.FloatTensor");
lua_pushcfunction(L, cuda_FloatTensor_fakecopy);
lua_setfield(L, -2, "fakecopy");
lua_pop(L, 1);
+#endif
- /* the copy methods */
- {
- int i;
-
- const void* tnames[8] = {"torch.ByteTensor",
- "torch.CharTensor",
- "torch.ShortTensor",
- "torch.IntTensor",
- "torch.LongTensor",
- "torch.FloatTensor",
- "torch.DoubleTensor",
- "torch.CudaTensor"};
-
- static int (*funcs[8])(lua_State*) = {cutorch_ByteTensor_copy,
- cutorch_CharTensor_copy,
- cutorch_ShortTensor_copy,
- cutorch_IntTensor_copy,
- cutorch_LongTensor_copy,
- cutorch_FloatTensor_copy,
- cutorch_DoubleTensor_copy,
- cutorch_CudaTensor_copy};
-
- for(i = 0; i < 8; i++)
- {
- luaT_pushmetatable(L, tnames[i]);
- lua_pushcfunction(L, funcs[i]);
- lua_setfield(L, -2, "copy");
- lua_pop(L, 1);
- }
+ // torch_Storage macro is defined in Storage.c produce the CudaTensor types
+ // so I have to construct the normal torch types by hand
+ luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Tensor));
+ lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Tensor_copy));
+ lua_setfield(L, -2, "copy");
+ lua_pop(L, 1);
- // Register async copy methods.
- luaT_pushmetatable(L, "torch.CudaTensor");
- lua_pushcfunction(L, cutorch_CudaTensor_copyAsync);
- lua_setfield(L, -2, "copyAsync");
- lua_pop(L, 1);
+ luaT_pushmetatable(L, torch_Tensor);
+ lua_pushcfunction(L, cutorch_Tensor_(copy));
+ lua_setfield(L, -2, "copy");
+ lua_pop(L, 1);
- luaT_pushmetatable(L, "torch.FloatTensor");
- lua_pushcfunction(L, cutorch_FloatTensor_copyAsync);
- lua_setfield(L, -2, "copyAsync");
- lua_pop(L, 1);
- }
+ // Register async copy methods.
+ luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Tensor));
+ lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Tensor_copyAsyncCuda));
+ lua_setfield(L, -2, "copyAsync");
+ lua_pop(L, 1);
+
+ luaT_pushmetatable(L, torch_Tensor);
+ lua_pushcfunction(L, cutorch_Tensor_(copyAsyncCPU));
+ lua_setfield(L, -2, "copyAsync");
+ lua_pop(L, 1);
- luaT_pushmetatable(L, "torch.CudaTensor");
- lua_pushcfunction(L, cutorch_CudaTensor_getDevice);
+ luaT_pushmetatable(L, torch_Tensor);
+ lua_pushcfunction(L, cutorch_Tensor_(getDevice));
lua_setfield(L, -2, "getDevice");
lua_pop(L, 1);
}
+
+#endif
diff --git a/init.c b/init.c
index 19dd3bb..44e1d75 100644
--- a/init.c
+++ b/init.c
@@ -3,8 +3,22 @@
#include "THCGeneral.h"
#include "THCTensorRandom.h"
+extern void cutorch_CudaByteStorage_init(lua_State* L);
+extern void cutorch_CudaCharStorage_init(lua_State* L);
+extern void cutorch_CudaShortStorage_init(lua_State* L);
+extern void cutorch_CudaIntStorage_init(lua_State* L);
+extern void cutorch_CudaLongStorage_init(lua_State* L);
extern void cutorch_CudaStorage_init(lua_State* L);
+extern void cutorch_CudaDoubleStorage_init(lua_State* L);
+
+extern void cutorch_CudaByteTensor_init(lua_State* L);
+extern void cutorch_CudaCharTensor_init(lua_State* L);
+extern void cutorch_CudaShortTensor_init(lua_State* L);
+extern void cutorch_CudaIntTensor_init(lua_State* L);
+extern void cutorch_CudaLongTensor_init(lua_State* L);
extern void cutorch_CudaTensor_init(lua_State* L);
+extern void cutorch_CudaDoubleTensor_init(lua_State* L);
+
extern void cutorch_CudaTensorMath_init(lua_State* L);
extern void cutorch_CudaTensorOperator_init(lua_State* L);
@@ -875,8 +889,22 @@ int luaopen_libcutorch(lua_State *L)
lua_setfield(L, -2, "magma");
#endif
+ cutorch_CudaByteStorage_init(L);
+ cutorch_CudaCharStorage_init(L);
+ cutorch_CudaShortStorage_init(L);
+ cutorch_CudaIntStorage_init(L);
+ cutorch_CudaLongStorage_init(L);
cutorch_CudaStorage_init(L);
+ cutorch_CudaDoubleStorage_init(L);
+
+ cutorch_CudaByteTensor_init(L);
+ cutorch_CudaCharTensor_init(L);
+ cutorch_CudaShortTensor_init(L);
+ cutorch_CudaIntTensor_init(L);
+ cutorch_CudaLongTensor_init(L);
cutorch_CudaTensor_init(L);
+ cutorch_CudaDoubleTensor_init(L);
+
cutorch_CudaTensorMath_init(L);
cutorch_CudaTensorOperator_init(L);
cutorch_Event_init(L);
diff --git a/init.lua b/init.lua
index bd8a0cb..df47f4e 100644
--- a/init.lua
+++ b/init.lua
@@ -1,8 +1,20 @@
require "torch"
paths.require "libcutorch"
-torch.CudaStorage.__tostring__ = torch.FloatStorage.__tostring__
-torch.CudaTensor.__tostring__ = torch.FloatTensor.__tostring__
+torch.CudaByteStorage.__tostring__ = torch.ByteStorage.__tostring__
+torch.CudaByteTensor.__tostring__ = torch.ByteTensor.__tostring__
+torch.CudaCharStorage.__tostring__ = torch.CharStorage.__tostring__
+torch.CudaCharTensor.__tostring__ = torch.CharTensor.__tostring__
+torch.CudaShortStorage.__tostring__ = torch.ShortStorage.__tostring__
+torch.CudaShortTensor.__tostring__ = torch.ShortTensor.__tostring__
+torch.CudaIntStorage.__tostring__ = torch.IntStorage.__tostring__
+torch.CudaIntTensor.__tostring__ = torch.IntTensor.__tostring__
+torch.CudaLongStorage.__tostring__ = torch.LongStorage.__tostring__
+torch.CudaLongTensor.__tostring__ = torch.LongTensor.__tostring__
+torch.CudaStorage.__tostring__ = torch.FloatStorage.__tostring__
+torch.CudaTensor.__tostring__ = torch.FloatTensor.__tostring__
+torch.CudaDoubleStorage.__tostring__ = torch.DoubleStorage.__tostring__
+torch.CudaDoubleTensor.__tostring__ = torch.DoubleTensor.__tostring__
include('Tensor.lua')
include('FFI.lua')
@@ -16,7 +28,7 @@ function cutorch.withDevice(newDeviceID, closure)
local vals = {pcall(closure)}
cutorch.setDevice(curDeviceID)
if vals[1] then
- return unpack(vals, 2)
+ return unpack(vals, 2)
end
error(unpack(vals, 2))
end
@@ -37,6 +49,4 @@ function cutorch.createCudaHostTensor(...)
return torch.FloatTensor(storage, 1, size:storage())
end
-cutorch.setHeapTracking(true)
-
return cutorch
diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt
index a422789..c4591b8 100644
--- a/lib/THC/CMakeLists.txt
+++ b/lib/THC/CMakeLists.txt
@@ -135,3 +135,18 @@ INSTALL(FILES
THCDeviceTensorUtils.cuh
THCDeviceTensorUtils-inl.cuh
DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC")
+
+INSTALL(FILES
+ generic/THCStorage.c
+ generic/THCStorage.cu
+ generic/THCStorage.h
+ generic/THCTensor.c
+ generic/THCTensor.cu
+ generic/THCTensor.h
+ generic/THCStorageCopy.c
+ generic/THCStorageCopy.cu
+ generic/THCStorageCopy.h
+ generic/THCTensorCopy.c
+ generic/THCTensorCopy.cu
+ generic/THCTensorCopy.h
+ DESTINATION "${THC_INSTALL_INCLUDE_SUBDIR}/THC/generic")
diff --git a/lib/THC/THCGenerateAllTypes.h b/lib/THC/THCGenerateAllTypes.h
new file mode 100644
index 0000000..4793d06
--- /dev/null
+++ b/lib/THC/THCGenerateAllTypes.h
@@ -0,0 +1,114 @@
+#ifndef THC_GENERIC_FILE
+#error "You must define THC_GENERIC_FILE before including THGenerateAllTypes.h"
+#endif
+
+#define THCTypeIdxByte 1
+#define THCTypeIdxChar 2
+#define THCTypeIdxShort 3
+#define THCTypeIdxInt 4
+#define THCTypeIdxLong 5
+#define THCTypeIdxFloat 6
+#define THCTypeIdxDouble 7
+#define THCTypeIdx_(T) TH_CONCAT_2(THCTypeIdx,T)
+
+#define real unsigned char
+#define accreal long
+#define Real Byte
+#define CReal CudaByte
+#define THC_REAL_IS_BYTE
+#line 1 THC_GENERIC_FILE
+/*#line 1 "THByteStorage.h"*/
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_BYTE
+
+#define real char
+#define accreal long
+#define Real Char
+#define CReal CudaChar
+#define THC_REAL_IS_CHAR
+#line 1 THC_GENERIC_FILE
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_CHAR
+
+#define real short
+#define accreal long
+#define Real Short
+#define CReal CudaShort
+#define THC_REAL_IS_SHORT
+#line 1 THC_GENERIC_FILE
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_SHORT
+
+#define real int
+#define accreal long
+#define Real Int
+#define CReal CudaInt
+#define THC_REAL_IS_INT
+#line 1 THC_GENERIC_FILE
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_INT
+
+#define real long
+#define accreal long
+#define Real Long
+#define CReal CudaLong
+#define THC_REAL_IS_LONG
+#line 1 THC_GENERIC_FILE
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_LONG
+
+#define real float
+#define accreal double
+#define Real Float
+#define CReal Cuda
+#define THC_REAL_IS_FLOAT
+#line 1 THC_GENERIC_FILE
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_FLOAT
+
+#define real double
+#define accreal double
+#define Real Double
+#define CReal CudaDouble
+#define THC_REAL_IS_DOUBLE
+#line 1 THC_GENERIC_FILE
+#include THC_GENERIC_FILE
+#undef real
+#undef accreal
+#undef Real
+#undef CReal
+#undef THC_REAL_IS_DOUBLE
+
+#undef THCTypeIdxByte
+#undef THCTypeIdxChar
+#undef THCTypeIdxShort
+#undef THCTypeIdxInt
+#undef THCTypeIdxLong
+#undef THCTypeIdxFloat
+#undef THCTypeIdxDouble
+
+#undef THC_GENERIC_FILE
diff --git a/lib/THC/THCStorage.c b/lib/THC/THCStorage.c
new file mode 100644
index 0000000..6fc9574
--- /dev/null
+++ b/lib/THC/THCStorage.c
@@ -0,0 +1,6 @@
+#include "THCStorage.h"
+#include "THCGeneral.h"
+#include "THAtomic.h"
+
+#include "generic/THCStorage.c"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCStorage.cu b/lib/THC/THCStorage.cu
new file mode 100644
index 0000000..2ceb0c7
--- /dev/null
+++ b/lib/THC/THCStorage.cu
@@ -0,0 +1,10 @@
+#include "THCStorage.h"
+
+#include <thrust/device_ptr.h>
+#include <thrust/fill.h>
+#if CUDA_VERSION >= 7000
+#include <thrust/system/cuda/execution_policy.h>
+#endif
+
+#include "generic/THCStorage.cu"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCStorage.h b/lib/THC/THCStorage.h
new file mode 100644
index 0000000..ac1cd70
--- /dev/null
+++ b/lib/THC/THCStorage.h
@@ -0,0 +1,17 @@
+#ifndef THC_STORAGE_INC
+#define THC_STORAGE_INC
+
+#include "THStorage.h"
+#include "THCGeneral.h"
+
+#define THCStorage TH_CONCAT_3(TH,CReal,Storage)
+#define THCStorage_(NAME) TH_CONCAT_4(TH,CReal,Storage_,NAME)
+
+/* fast access methods */
+#define THC_STORAGE_GET(storage, idx) ((storage)->data[(idx)])
+#define THC_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value))
+
+#include "generic/THCStorage.h"
+#include "THCGenerateAllTypes.h"
+
+#endif
diff --git a/lib/THC/THCStorageCopy.c b/lib/THC/THCStorageCopy.c
new file mode 100644
index 0000000..76a8076
--- /dev/null
+++ b/lib/THC/THCStorageCopy.c
@@ -0,0 +1,5 @@
+#include "THCStorageCopy.h"
+#include "THCGeneral.h"
+
+#include "generic/THCStorageCopy.c"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCStorageCopy.cu b/lib/THC/THCStorageCopy.cu
new file mode 100644
index 0000000..b8b0417
--- /dev/null
+++ b/lib/THC/THCStorageCopy.cu
@@ -0,0 +1,5 @@
+#include "THCStorageCopy.h"
+#include "THCGeneral.h"
+
+#include "generic/THCStorageCopy.cu"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCStorageCopy.h b/lib/THC/THCStorageCopy.h
new file mode 100644
index 0000000..ec8011d
--- /dev/null
+++ b/lib/THC/THCStorageCopy.h
@@ -0,0 +1,10 @@
+#ifndef THC_STORAGE_COPY_INC
+#define THC_STORAGE_COPY_INC
+
+#include "THCStorage.h"
+#include "THCGeneral.h"
+
+#include "generic/THCStorageCopy.h"
+#include "THCGenerateAllTypes.h"
+
+#endif
diff --git a/lib/THC/THCTensor.c b/lib/THC/THCTensor.c
new file mode 100644
index 0000000..3bcf69d
--- /dev/null
+++ b/lib/THC/THCTensor.c
@@ -0,0 +1,7 @@
+#include "THCGeneral.h"
+#include "THCTensor.h"
+#include "THCTensorCopy.h"
+#include "THAtomic.h"
+
+#include "generic/THCTensor.c"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCTensor.cu b/lib/THC/THCTensor.cu
new file mode 100644
index 0000000..1e6fc20
--- /dev/null
+++ b/lib/THC/THCTensor.cu
@@ -0,0 +1,4 @@
+#include "THCTensor.h"
+
+#include "generic/THCTensor.cu"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCTensor.h b/lib/THC/THCTensor.h
new file mode 100644
index 0000000..d4eb49a
--- /dev/null
+++ b/lib/THC/THCTensor.h
@@ -0,0 +1,14 @@
+#ifndef THC_TENSOR_INC
+#define THC_TENSOR_INC
+
+#include "THTensor.h"
+#include "THCStorage.h"
+#include "THCGeneral.h"
+
+#define THCTensor TH_CONCAT_3(TH,CReal,Tensor)
+#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)
+
+#include "generic/THCTensor.h"
+#include "THCGenerateAllTypes.h"
+
+#endif
diff --git a/lib/THC/THCTensorCopy.c b/lib/THC/THCTensorCopy.c
new file mode 100644
index 0000000..6813530
--- /dev/null
+++ b/lib/THC/THCTensorCopy.c
@@ -0,0 +1,6 @@
+#include "THCTensorCopy.h"
+#include "THCGeneral.h"
+#include "THCTensor.h"
+
+#include "generic/THCTensorCopy.c"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCTensorCopy.cu b/lib/THC/THCTensorCopy.cu
new file mode 100644
index 0000000..9e59dd8
--- /dev/null
+++ b/lib/THC/THCTensorCopy.cu
@@ -0,0 +1,10 @@
+#include "THCApply.cuh"
+
+inline int curGPU() {
+ int curDev;
+ THCudaCheck(cudaGetDevice(&curDev));
+ return curDev;
+}
+
+#include "generic/THCTensorCopy.cu"
+#include "THCGenerateAllTypes.h"
diff --git a/lib/THC/THCTensorCopy.h b/lib/THC/THCTensorCopy.h
new file mode 100644
index 0000000..fc206cb
--- /dev/null
+++ b/lib/THC/THCTensorCopy.h
@@ -0,0 +1,10 @@
+#ifndef TH_CUDA_TENSOR_COPY_INC
+#define TH_CUDA_TENSOR_COPY_INC
+
+#include "THCTensor.h"
+#include "THCGeneral.h"
+
+#include "generic/THCTensorCopy.h"
+#include "THCGenerateAllTypes.h"
+
+#endif
diff --git a/lib/THC/generic/THCStorage.c b/lib/THC/generic/THCStorage.c
index e7c529c..946ebc1 100644
--- a/lib/THC/generic/THCStorage.c
+++ b/lib/THC/generic/THCStorage.c
@@ -1,39 +1,39 @@
-#include "THCStorage.h"
-#include "THCGeneral.h"
-#include "THAtomic.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCStorage.c"
+#else
-float* THCudaStorage_data(THCState *state, const THCudaStorage *self)
+real* THCStorage_(data)(THCState *state, const THCStorage *self)
{
return self->data;
}
-long THCudaStorage_size(THCState *state, const THCudaStorage *self)
+long THCStorage_(size)(THCState *state, const THCStorage *self)
{
return self->size;
}
-int THCudaStorage_elementSize(THCState *state)
+int THCStorage_(elementSize)(THCState *state)
{
- return sizeof(float);
+ return sizeof(real);
}
-void THCudaStorage_set(THCState *state, THCudaStorage *self, long index, float value)
+void THCStorage_(set)(THCState *state, THCStorage *self, long index, real value)
{
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
- THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(float), cudaMemcpyHostToDevice));
+ THCudaCheck(cudaMemcpy(self->data + index, &value, sizeof(real), cudaMemcpyHostToDevice));
}
-float THCudaStorage_get(THCState *state, const THCudaStorage *self, long index)
+real THCStorage_(get)(THCState *state, const THCStorage *self, long index)
{
- float value;
+ real value;
THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
- THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(float), cudaMemcpyDeviceToHost));
+ THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(real), cudaMemcpyDeviceToHost));
return value;
}
-THCudaStorage* THCudaStorage_new(THCState *state)
+THCStorage* THCStorage_(new)(THCState *state)
{
- THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage));
+ THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage));
storage->data = NULL;
storage->size = 0;
storage->refcount = 1;
@@ -41,20 +41,20 @@ THCudaStorage* THCudaStorage_new(THCState *state)
return storage;
}
-THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size)
+THCStorage* THCStorage_(newWithSize)(THCState *state, long size)
{
THArgCheck(size >= 0, 2, "invalid size");
if(size > 0)
{
- THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage));
+ THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage));
// update heap *before* attempting malloc, to free space for the malloc
- THCHeapUpdate(state, size * sizeof(float));
+ THCHeapUpdate(state, size * sizeof(real));
cudaError_t err =
- THCudaMalloc(state, (void**)&(storage->data), size * sizeof(float));
+ THCudaMalloc(state, (void**)&(storage->data), size * sizeof(real));
if(err != cudaSuccess){
- THCHeapUpdate(state, -size * sizeof(float));
+ THCHeapUpdate(state, -size * sizeof(real));
}
THCudaCheck(err);
@@ -65,53 +65,53 @@ THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size)
}
else
{
- return THCudaStorage_new(state);
+ return THCStorage_(new)(state);
}
}
-THCudaStorage* THCudaStorage_newWithSize1(THCState *state, float data0)
+THCStorage* THCStorage_(newWithSize1)(THCState *state, real data0)
{
- THCudaStorage *self = THCudaStorage_newWithSize(state, 1);
- THCudaStorage_set(state, self, 0, data0);
+ THCStorage *self = THCStorage_(newWithSize)(state, 1);
+ THCStorage_(set)(state, self, 0, data0);
return self;
}
-THCudaStorage* THCudaStorage_newWithSize2(THCState *state, float data0, float data1)
+THCStorage* THCStorage_(newWithSize2)(THCState *state, real data0, real data1)
{
- THCudaStorage *self = THCudaStorage_newWithSize(state, 2);
- THCudaStorage_set(state, self, 0, data0);
- THCudaStorage_set(state, self, 1, data1);
+ THCStorage *self = THCStorage_(newWithSize)(state, 2);
+ THCStorage_(set)(state, self, 0, data0);
+ THCStorage_(set)(state, self, 1, data1);
return self;
}
-THCudaStorage* THCudaStorage_newWithSize3(THCState *state, float data0, float data1, float data2)
+THCStorage* THCStorage_(newWithSize3)(THCState *state, real data0, real data1, real data2)
{
- THCudaStorage *self = THCudaStorage_newWithSize(state, 3);
- THCudaStorage_set(state, self, 0, data0);
- THCudaStorage_set(state, self, 1, data1);
- THCudaStorage_set(state, self, 2, data2);
+ THCStorage *self = THCStorage_(newWithSize)(state, 3);
+ THCStorage_(set)(state, self, 0, data0);
+ THCStorage_(set)(state, self, 1, data1);
+ THCStorage_(set)(state, self, 2, data2);
return self;
}
-THCudaStorage* THCudaStorage_newWithSize4(THCState *state, float data0, float data1, float data2, float data3)
+THCStorage* THCStorage_(newWithSize4)(THCState *state, real data0, real data1, real data2, real data3)
{
- THCudaStorage *self = THCudaStorage_newWithSize(state, 4);
- THCudaStorage_set(state, self, 0, data0);
- THCudaStorage_set(state, self, 1, data1);
- THCudaStorage_set(state, self, 2, data2);
- THCudaStorage_set(state, self, 3, data3);
+ THCStorage *self = THCStorage_(newWithSize)(state, 4);
+ THCStorage_(set)(state, self, 0, data0);
+ THCStorage_(set)(state, self, 1, data1);
+ THCStorage_(set)(state, self, 2, data2);
+ THCStorage_(set)(state, self, 3, data3);
return self;
}
-THCudaStorage* THCudaStorage_newWithMapping(THCState *state, const char *fileName, long size, int isShared)
+THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *fileName, long size, int isShared)
{
- THError("not available yet for THCudaStorage");
+ THError("not available yet for THCStorage");
return NULL;
}
-THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size)
+THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size)
{
- THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage));
+ THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage));
storage->data = data;
storage->size = size;
storage->refcount = 1;
@@ -119,23 +119,23 @@ THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size
return storage;
}
-void THCudaStorage_setFlag(THCState *state, THCudaStorage *storage, const char flag)
+void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag)
{
storage->flag |= flag;
}
-void THCudaStorage_clearFlag(THCState *state, THCudaStorage *storage, const char flag)
+void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag)
{
storage->flag &= ~flag;
}
-void THCudaStorage_retain(THCState *state, THCudaStorage *self)
+void THCStorage_(retain)(THCState *state, THCStorage *self)
{
if(self && (self->flag & TH_STORAGE_REFCOUNTED))
THAtomicIncrementRef(&self->refcount);
}
-void THCudaStorage_free(THCState *state, THCudaStorage *self)
+void THCStorage_(free)(THCState *state, THCStorage *self)
{
if(!(self->flag & TH_STORAGE_REFCOUNTED))
return;
@@ -143,9 +143,10 @@ void THCudaStorage_free(THCState *state, THCudaStorage *self)
if (THAtomicDecrementRef(&self->refcount))
{
if(self->flag & TH_STORAGE_FREEMEM) {
- THCHeapUpdate(state, -self->size * sizeof(float));
+ THCHeapUpdate(state, -self->size * sizeof(real));
THCudaCheck(THCudaFree(state, self->data));
}
THFree(self);
}
}
+#endif
diff --git a/lib/THC/generic/THCStorage.cu b/lib/THC/generic/THCStorage.cu
index c61f08e..740b2bd 100644
--- a/lib/THC/generic/THCStorage.cu
+++ b/lib/THC/generic/THCStorage.cu
@@ -1,14 +1,10 @@
-#include "THCStorage.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCStorage.cu"
+#else
-#include <thrust/device_ptr.h>
-#include <thrust/fill.h>
-#if CUDA_VERSION >= 7000
-#include <thrust/system/cuda/execution_policy.h>
-#endif
-
-void THCudaStorage_fill(THCState *state, THCudaStorage *self, float value)
+void THCStorage_(fill)(THCState *state, THCStorage *self, real value)
{
- thrust::device_ptr<float> self_data(self->data);
+ thrust::device_ptr<real> self_data(self->data);
thrust::fill(
#if CUDA_VERSION >= 7000
thrust::cuda::par.on(THCState_getCurrentStream(state)),
@@ -16,7 +12,7 @@ void THCudaStorage_fill(THCState *state, THCudaStorage *self, float value)
self_data, self_data+self->size, value);
}
-void THCudaStorage_resize(THCState *state, THCudaStorage *self, long size)
+void THCStorage_(resize)(THCState *state, THCStorage *self, long size)
{
THArgCheck(size >= 0, 2, "invalid size");
@@ -27,33 +23,34 @@ void THCudaStorage_resize(THCState *state, THCudaStorage *self, long size)
{
if(self->flag & TH_STORAGE_FREEMEM) {
THCudaCheck(THCudaFree(state, self->data));
- THCHeapUpdate(state, -self->size * sizeof(float));
+ THCHeapUpdate(state, -self->size * sizeof(real));
}
self->data = NULL;
self->size = 0;
}
else
{
- float *data = NULL;
+ real *data = NULL;
// update heap *before* attempting malloc, to free space for the malloc
- THCHeapUpdate(state, size * sizeof(float));
- cudaError_t err = THCudaMalloc(state, (void**)(&data), size * sizeof(float));
+ THCHeapUpdate(state, size * sizeof(real));
+ cudaError_t err = THCudaMalloc(state, (void**)(&data), size * sizeof(real));
if(err != cudaSuccess) {
- THCHeapUpdate(state, -size * sizeof(float));
+ THCHeapUpdate(state, -size * sizeof(real));
}
THCudaCheck(err);
if (self->data) {
THCudaCheck(cudaMemcpyAsync(data,
self->data,
- THMin(self->size, size) * sizeof(float),
+ THMin(self->size, size) * sizeof(real),
cudaMemcpyDeviceToDevice,
THCState_getCurrentStream(state)));
THCudaCheck(THCudaFree(state, self->data));
- THCHeapUpdate(state, -self->size * sizeof(float));
+ THCHeapUpdate(state, -self->size * sizeof(real));
}
self->data = data;
self->size = size;
}
}
+#endif
diff --git a/lib/THC/generic/THCStorage.h b/lib/THC/generic/THCStorage.h
index 6549e5c..a8c5f5f 100644
--- a/lib/THC/generic/THCStorage.h
+++ b/lib/THC/generic/THCStorage.h
@@ -1,57 +1,54 @@
-#ifndef THC_STORAGE_INC
-#define THC_STORAGE_INC
-
-#include "THStorage.h"
-#include "THCGeneral.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCStorage.h"
+#else
#define TH_STORAGE_REFCOUNTED 1
#define TH_STORAGE_RESIZABLE 2
#define TH_STORAGE_FREEMEM 4
-
-typedef struct THCudaStorage
+typedef struct THCStorage
{
- float *data;
+ real *data;
long size;
int refcount;
char flag;
THAllocator *allocator;
void *allocatorContext;
- struct THCudaStorage *view;
-} THCudaStorage;
+ struct THCStorage *view;
+} THCStorage;
-THC_API float* THCudaStorage_data(THCState *state, const THCudaStorage*);
-THC_API long THCudaStorage_size(THCState *state, const THCudaStorage*);
-THC_API int THCudaStorage_elementSize(THCState *state);
+THC_API real* THCStorage_(data)(THCState *state, const THCStorage*);
+THC_API long THCStorage_(size)(THCState *state, const THCStorage*);
+THC_API int THCStorage_(elementSize)(THCState *state);
/* slow access -- checks everything */
-THC_API void THCudaStorage_set(THCState *state, THCudaStorage*, long, float);
-THC_API float THCudaStorage_get(THCState *state, const THCudaStorage*, long);
+THC_API void THCStorage_(set)(THCState *state, THCStorage*, long, real);
+THC_API real THCStorage_(get)(THCState *state, const THCStorage*, long);
-THC_API THCudaStorage* THCudaStorage_new(THCState *state);
-THC_API THCudaStorage* THCudaStorage_newWithSize(THCState *state, long size);
-THC_API THCudaStorage* THCudaStorage_newWithSize1(THCState *state, float);
-THC_API THCudaStorage* THCudaStorage_newWithSize2(THCState *state, float, float);
-THC_API THCudaStorage* THCudaStorage_newWithSize3(THCState *state, float, float, float);
-THC_API THCudaStorage* THCudaStorage_newWithSize4(THCState *state, float, float, float, float);
-THC_API THCudaStorage* THCudaStorage_newWithMapping(THCState *state, const char *filename, long size, int shared);
+THC_API THCStorage* THCStorage_(new)(THCState *state);
+THC_API THCStorage* THCStorage_(newWithSize)(THCState *state, long size);
+THC_API THCStorage* THCStorage_(newWithSize1)(THCState *state, real);
+THC_API THCStorage* THCStorage_(newWithSize2)(THCState *state, real, real);
+THC_API THCStorage* THCStorage_(newWithSize3)(THCState *state, real, real, real);
+THC_API THCStorage* THCStorage_(newWithSize4)(THCState *state, real, real, real, real);
+THC_API THCStorage* THCStorage_(newWithMapping)(THCState *state, const char *filename, long size, int shared);
/* takes ownership of data */
-THC_API THCudaStorage* THCudaStorage_newWithData(THCState *state, float *data, long size);
+THC_API THCStorage* THCStorage_(newWithData)(THCState *state, real *data, long size);
-THC_API THCudaStorage* THCudaStorage_newWithAllocator(THCState *state, long size,
+THC_API THCStorage* THCStorage_(newWithAllocator)(THCState *state, long size,
THAllocator* allocator,
void *allocatorContext);
-THC_API THCudaStorage* THCudaStorage_newWithDataAndAllocator(
- THCState *state, float* data, long size, THAllocator* allocator, void *allocatorContext);
+THC_API THCStorage* THCStorage_(newWithDataAndAllocator)(
+ THCState *state, real* data, long size, THAllocator* allocator, void *allocatorContext);
-THC_API void THCudaStorage_setFlag(THCState *state, THCudaStorage *storage, const char flag);
-THC_API void THCudaStorage_clearFlag(THCState *state, THCudaStorage *storage, const char flag);
-THC_API void THCudaStorage_retain(THCState *state, THCudaStorage *storage);
+THC_API void THCStorage_(setFlag)(THCState *state, THCStorage *storage, const char flag);
+THC_API void THCStorage_(clearFlag)(THCState *state, THCStorage *storage, const char flag);
+THC_API void THCStorage_(retain)(THCState *state, THCStorage *storage);
-THC_API void THCudaStorage_free(THCState *state, THCudaStorage *storage);
-THC_API void THCudaStorage_resize(THCState *state, THCudaStorage *storage, long size);
-THC_API void THCudaStorage_fill(THCState *state, THCudaStorage *storage, float value);
+THC_API void THCStorage_(free)(THCState *state, THCStorage *storage);
+THC_API void THCStorage_(resize)(THCState *state, THCStorage *storage, long size);
+THC_API void THCStorage_(fill)(THCState *state, THCStorage *storage, real value);
#endif
diff --git a/lib/THC/generic/THCStorageCopy.c b/lib/THC/generic/THCStorageCopy.c
index 3517b2c..1696307 100644
--- a/lib/THC/generic/THCStorageCopy.c
+++ b/lib/THC/generic/THCStorageCopy.c
@@ -1,21 +1,26 @@
-#include "THCStorageCopy.h"
-#include "THCGeneral.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCStorageCopy.c"
+#else
-void THCudaStorage_copyFloat(THCState *state, THCudaStorage *self, struct THFloatStorage *src)
+void THCStorage_(copyCPU)(THCState *state, THCStorage *self, struct THStorage *src)
{
THArgCheck(self->size == src->size, 2, "size does not match");
- THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(float), cudaMemcpyHostToDevice));
+ THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(real), cudaMemcpyHostToDevice));
}
-#define TH_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC) \
- void THCudaStorage_copy##TYPEC(THCState *state, THCudaStorage *self, struct TH##TYPEC##Storage *src) \
- { \
- THFloatStorage *buffer; \
- THArgCheck(self->size == src->size, 2, "size does not match"); \
- buffer = THFloatStorage_newWithSize(src->size); \
- THFloatStorage_copy##TYPEC(buffer, src); \
- THCudaStorage_copyFloat(state, self, buffer); \
- THFloatStorage_free(buffer); \
+#define TH_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC) \
+ void THCStorage_(copy##TYPEC)(THCState *state, THCStorage *self, struct TH##TYPEC##Storage *src) \
+ { \
+ if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
+ THCStorage_(copyCPU)(state, self, (THStorage*) src); /* cast just removes compiler warning */ \
+ } else { \
+ THStorage *buffer; \
+ THArgCheck(self->size == src->size, 2, "size does not match"); \
+ buffer = THStorage_(newWithSize)(src->size); \
+ THStorage_(copy##TYPEC)(buffer, src); \
+ THCStorage_(copyCPU)(state, self, buffer); \
+ THStorage_(free)(buffer); \
+ } \
}
TH_CUDA_STORAGE_IMPLEMENT_COPY(Byte)
@@ -23,23 +28,28 @@ TH_CUDA_STORAGE_IMPLEMENT_COPY(Char)
TH_CUDA_STORAGE_IMPLEMENT_COPY(Short)
TH_CUDA_STORAGE_IMPLEMENT_COPY(Int)
TH_CUDA_STORAGE_IMPLEMENT_COPY(Long)
+TH_CUDA_STORAGE_IMPLEMENT_COPY(Float)
TH_CUDA_STORAGE_IMPLEMENT_COPY(Double)
-void THFloatStorage_copyCuda(THCState *state, THFloatStorage *self, struct THCudaStorage *src)
+void THStorage_(copyCuda)(THCState *state, THStorage *self, struct THCStorage *src)
{
THArgCheck(self->size == src->size, 2, "size does not match");
- THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToHost));
+ THCudaCheck(cudaMemcpy(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToHost));
}
-#define TH_CUDA_STORAGE_IMPLEMENT_COPYTO(TYPEC) \
- void TH##TYPEC##Storage_copyCuda(THCState *state, TH##TYPEC##Storage *self, struct THCudaStorage *src) \
+#define TH_CUDA_STORAGE_IMPLEMENT_COPYTO(TYPEC) \
+ void TH_CONCAT_4(TH,TYPEC,Storage_copyCuda,Real)(THCState *state, TH##TYPEC##Storage *self, struct THCStorage *src) \
{ \
- THFloatStorage *buffer; \
- THArgCheck(self->size == src->size, 2, "size does not match"); \
- buffer = THFloatStorage_newWithSize(src->size); \
- THFloatStorage_copyCuda(state, buffer, src); \
- TH##TYPEC##Storage_copyFloat(self, buffer); \
- THFloatStorage_free(buffer); \
+ if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
+ THStorage_(copyCuda)(state, (THStorage*) self, src); /* cast just removes compiler warnings */ \
+ } else { \
+ THStorage *buffer; \
+ THArgCheck(self->size == src->size, 2, "size does not match"); \
+ buffer = THStorage_(newWithSize)(src->size); \
+ THStorage_(copyCuda)(state, buffer, src); \
+ TH_CONCAT_4(TH,TYPEC,Storage_copy,Real)(self, buffer); \
+ THStorage_(free)(buffer); \
+ } \
}
TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Byte)
@@ -47,4 +57,7 @@ TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Char)
TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Short)
TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Int)
TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Long)
+TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Float)
TH_CUDA_STORAGE_IMPLEMENT_COPYTO(Double)
+
+#endif
diff --git a/lib/THC/generic/THCStorageCopy.cu b/lib/THC/generic/THCStorageCopy.cu
index c3205b5..ffe8332 100644
--- a/lib/THC/generic/THCStorageCopy.cu
+++ b/lib/THC/generic/THCStorageCopy.cu
@@ -1,19 +1,49 @@
-#include "THCStorageCopy.h"
-#include "THCGeneral.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCStorageCopy.cu"
+#else
-void THCudaStorage_rawCopy(THCState *state, THCudaStorage *self, float *src)
+void THCStorage_(rawCopy)(THCState *state, THCStorage *self, real *src)
{
- THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
+ THCudaCheck(cudaMemcpyAsync(self->data, src, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}
-void THCudaStorage_copy(THCState *state, THCudaStorage *self, THCudaStorage *src)
+void THCStorage_(copy)(THCState *state, THCStorage *self, THCStorage *src)
{
THArgCheck(self->size == src->size, 2, "size does not match");
- THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
+ THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}
-void THCudaStorage_copyCuda(THCState *state, THCudaStorage *self, THCudaStorage *src)
+void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src)
{
THArgCheck(self->size == src->size, 2, "size does not match");
- THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(float), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
+ THCudaCheck(cudaMemcpyAsync(self->data, src->data, self->size * sizeof(real), cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state)));
}
+
+// conversions are mediated by the CPU
+// yes, this is slow; feel free to write CUDA kernels for this
+#define THC_CUDA_STORAGE_IMPLEMENT_COPY(TYPEC,TYPECUDA) \
+ void THCStorage_(copyCuda##TYPEC)(THCState *state, THCStorage *self, struct THCuda##TYPECUDA##Storage *src) \
+ { \
+ if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
+ THCStorage_(copy)(state, self, (THCStorage*) src); /* cast just removes compiler warning */ \
+ } else { \
+ THArgCheck(self->size == src->size, 2, "size does not match"); \
+ TH##TYPEC##Storage *buffer1 = TH##TYPEC##Storage_newWithSize(src->size); \
+ THStorage *buffer2 = THStorage_(newWithSize)(src->size); \
+ TH##TYPEC##Storage_copyCuda(state, buffer1, src); \
+ THStorage_(copy##TYPEC)(buffer2, buffer1); \
+ THCStorage_(copyCPU)(state, self, buffer2); \
+ TH##TYPEC##Storage_free(buffer1); \
+ THStorage_(free)(buffer2); \
+ } \
+ }
+
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Byte,Byte)
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Char,Char)
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Short,Short)
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Int,Int)
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Long,Long)
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Float,) // i.e. float
+THC_CUDA_STORAGE_IMPLEMENT_COPY(Double,Double)
+
+#endif
diff --git a/lib/THC/generic/THCStorageCopy.h b/lib/THC/generic/THCStorageCopy.h
index 4c2f8df..d80661d 100644
--- a/lib/THC/generic/THCStorageCopy.h
+++ b/lib/THC/generic/THCStorageCopy.h
@@ -1,28 +1,37 @@
-#ifndef THC_STORAGE_COPY_INC
-#define THC_STORAGE_COPY_INC
-
-#include "THCStorage.h"
-#include "THCGeneral.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCStorageCopy.h"
+#else
/* Support for copy between different Storage types */
-THC_API void THCudaStorage_rawCopy(THCState *state, THCudaStorage *storage, float *src);
-THC_API void THCudaStorage_copy(THCState *state, THCudaStorage *storage, THCudaStorage *src);
-THC_API void THCudaStorage_copyByte(THCState *state, THCudaStorage *storage, struct THByteStorage *src);
-THC_API void THCudaStorage_copyChar(THCState *state, THCudaStorage *storage, struct THCharStorage *src);
-THC_API void THCudaStorage_copyShort(THCState *state, THCudaStorage *storage, struct THShortStorage *src);
-THC_API void THCudaStorage_copyInt(THCState *state, THCudaStorage *storage, struct THIntStorage *src);
-THC_API void THCudaStorage_copyLong(THCState *state, THCudaStorage *storage, struct THLongStorage *src);
-THC_API void THCudaStorage_copyFloat(THCState *state, THCudaStorage *storage, struct THFloatStorage *src);
-THC_API void THCudaStorage_copyDouble(THCState *state, THCudaStorage *storage, struct THDoubleStorage *src);
+THC_API void THCStorage_(rawCopy)(THCState *state, THCStorage *storage, real *src);
+THC_API void THCStorage_(copy)(THCState *state, THCStorage *storage, THCStorage *src);
+THC_API void THCStorage_(copyByte)(THCState *state, THCStorage *storage, struct THByteStorage *src);
+THC_API void THCStorage_(copyChar)(THCState *state, THCStorage *storage, struct THCharStorage *src);
+THC_API void THCStorage_(copyShort)(THCState *state, THCStorage *storage, struct THShortStorage *src);
+THC_API void THCStorage_(copyInt)(THCState *state, THCStorage *storage, struct THIntStorage *src);
+THC_API void THCStorage_(copyLong)(THCState *state, THCStorage *storage, struct THLongStorage *src);
+THC_API void THCStorage_(copyFloat)(THCState *state, THCStorage *storage, struct THFloatStorage *src);
+THC_API void THCStorage_(copyDouble)(THCState *state, THCStorage *storage, struct THDoubleStorage *src);
+
+THC_API void THCStorage_(copyCudaByte)(THCState *state, THCStorage *storage, struct THCudaByteStorage *src);
+THC_API void THCStorage_(copyCudaChar)(THCState *state, THCStorage *storage, struct THCudaCharStorage *src);
+THC_API void THCStorage_(copyCudaShort)(THCState *state, THCStorage *storage, struct THCudaShortStorage *src);
+THC_API void THCStorage_(copyCudaInt)(THCState *state, THCStorage *storage, struct THCudaIntStorage *src);
+THC_API void THCStorage_(copyCudaLong)(THCState *state, THCStorage *storage, struct THCudaLongStorage *src);
+THC_API void THCStorage_(copyCudaFloat)(THCState *state, THCStorage *storage, struct THCudaStorage *src);
+THC_API void THCStorage_(copyCudaDouble)(THCState *state, THCStorage *storage, struct THCudaDoubleStorage *src);
+
-THC_API void THByteStorage_copyCuda(THCState *state, THByteStorage *self, struct THCudaStorage *src);
-THC_API void THCharStorage_copyCuda(THCState *state, THCharStorage *self, struct THCudaStorage *src);
-THC_API void THShortStorage_copyCuda(THCState *state, THShortStorage *self, struct THCudaStorage *src);
-THC_API void THIntStorage_copyCuda(THCState *state, THIntStorage *self, struct THCudaStorage *src);
-THC_API void THLongStorage_copyCuda(THCState *state, THLongStorage *self, struct THCudaStorage *src);
-THC_API void THFloatStorage_copyCuda(THCState *state, THFloatStorage *self, struct THCudaStorage *src);
-THC_API void THDoubleStorage_copyCuda(THCState *state, THDoubleStorage *self, struct THCudaStorage *src);
-THC_API void THCudaStorage_copyCuda(THCState *state, THCudaStorage *self, THCudaStorage *src);
+THC_API void TH_CONCAT_2(THByteStorage_copyCuda , Real)(THCState *state, THByteStorage *self, struct THCStorage *src);
+THC_API void TH_CONCAT_2(THCharStorage_copyCuda , Real)(THCState *state, THCharStorage *self, struct THCStorage *src);
+THC_API void TH_CONCAT_2(THShortStorage_copyCuda , Real)(THCState *state, THShortStorage *self, struct THCStorage *src);
+THC_API void TH_CONCAT_2(THIntStorage_copyCuda , Real)(THCState *state, THIntStorage *self, struct THCStorage *src);
+THC_API void TH_CONCAT_2(THLongStorage_copyCuda , Real)(THCState *state, THLongStorage *self, struct THCStorage *src);
+THC_API void TH_CONCAT_2(THFloatStorage_copyCuda , Real)(THCState *state, THFloatStorage *self, struct THCStorage *src);
+THC_API void TH_CONCAT_2(THDoubleStorage_copyCuda, Real)(THCState *state, THDoubleStorage *self, struct THCStorage *src);
+THC_API void THStorage_(copyCuda)(THCState *state, THStorage *self, THCStorage *src);
+THC_API void THCStorage_(copyCuda)(THCState *state, THCStorage *self, THCStorage *src);
+THC_API void THCStorage_(copyCPU)(THCState *state, THCStorage *self, THStorage *src);
#endif
diff --git a/lib/THC/generic/THCTensor.c b/lib/THC/generic/THCTensor.c
index fa325e3..6645c0b 100644
--- a/lib/THC/generic/THCTensor.c
+++ b/lib/THC/generic/THCTensor.c
@@ -1,51 +1,50 @@
-#include "THCGeneral.h"
-#include "THCTensor.h"
-#include "THCTensorCopy.h"
-#include "THAtomic.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCTensor.c"
+#else
/**** access methods ****/
-THCudaStorage *THCudaTensor_storage(THCState *state, const THCudaTensor *self)
+THCStorage *THCTensor_(storage)(THCState *state, const THCTensor *self)
{
return self->storage;
}
-long THCudaTensor_storageOffset(THCState *state, const THCudaTensor *self)
+long THCTensor_(storageOffset)(THCState *state, const THCTensor *self)
{
return self->storageOffset;
}
-int THCudaTensor_nDimension(THCState *state, const THCudaTensor *self)
+int THCTensor_(nDimension)(THCState *state, const THCTensor *self)
{
return self->nDimension;
}
-long THCudaTensor_size(THCState *state, const THCudaTensor *self, int dim)
+long THCTensor_(size)(THCState *state, const THCTensor *self, int dim)
{
THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "out of range");
return self->size[dim];
}
-long THCudaTensor_stride(THCState *state, const THCudaTensor *self, int dim)
+long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim)
{
THArgCheck((dim >= 0) && (dim < self->nDimension), 2, "out of range");
return self->stride[dim];
}
-THLongStorage *THCudaTensor_newSizeOf(THCState *state, THCudaTensor *self)
+THLongStorage *THCTensor_(newSizeOf)(THCState *state, THCTensor *self)
{
THLongStorage *size = THLongStorage_newWithSize(self->nDimension);
THLongStorage_rawCopy(size, self->size);
return size;
}
-THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self)
+THLongStorage *THCTensor_(newStrideOf)(THCState *state, THCTensor *self)
{
THLongStorage *stride = THLongStorage_newWithSize(self->nDimension);
THLongStorage_rawCopy(stride, self->stride);
return stride;
}
-float *THCudaTensor_data(THCState *state, const THCudaTensor *self)
+real *THCTensor_(data)(THCState *state, const THCTensor *self)
{
if(self->storage)
return (self->storage->data+self->storageOffset);
@@ -53,36 +52,36 @@ float *THCudaTensor_data(THCState *state, const THCudaTensor *self)
return NULL;
}
-void THCudaTensor_setFlag(THCState *state, THCudaTensor *self, const char flag)
+void THCTensor_(setFlag)(THCState *state, THCTensor *self, const char flag)
{
self->flag |= flag;
}
-void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag)
+void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag)
{
self->flag &= ~flag;
}
/**** creation methods ****/
-static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self);
-static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride);
+static void THCTensor_(rawInit)(THCState *state, THCTensor *self);
+static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride);
/* Empty init */
-THCudaTensor *THCudaTensor_new(THCState *state)
+THCTensor *THCTensor_(new)(THCState *state)
{
- THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor));
- THCudaTensor_rawInit(state, self);
+ THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
+ THCTensor_(rawInit)(state, self);
return self;
}
/* Pointer-copy init */
-THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor)
+THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor)
{
- THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor));
- THCudaTensor_rawInit(state, self);
- THCudaTensor_rawSet(state,
+ THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
+ THCTensor_(rawInit)(state, self);
+ THCTensor_(rawSet)(state,
self,
tensor->storage,
tensor->storageOffset,
@@ -93,14 +92,14 @@ THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor)
}
/* Storage init */
-THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride)
+THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride)
{
- THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor));
+ THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
if(size && stride)
THArgCheck(size->size == stride->size, 4, "inconsistent size");
- THCudaTensor_rawInit(state, self);
- THCudaTensor_rawSet(state,
+ THCTensor_(rawInit)(state, self);
+ THCTensor_(rawSet)(state,
self,
storage,
storageOffset,
@@ -110,28 +109,28 @@ THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storag
return self;
}
-THCudaTensor *THCudaTensor_newWithStorage1d(THCState *state, THCudaStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage, long storageOffset,
long size0, long stride0)
{
- return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1);
+ return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, -1, -1, -1, -1, -1, -1);
}
-THCudaTensor *THCudaTensor_newWithStorage2d(THCState *state, THCudaStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage, long storageOffset,
long size0, long stride0,
long size1, long stride1)
{
- return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1);
+ return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, -1, -1, -1, -1);
}
-THCudaTensor *THCudaTensor_newWithStorage3d(THCState *state, THCudaStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage, long storageOffset,
long size0, long stride0,
long size1, long stride1,
long size2, long stride2)
{
- return THCudaTensor_newWithStorage4d(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1);
+ return THCTensor_(newWithStorage4d)(state, storage, storageOffset, size0, stride0, size1, stride1, size2, stride2, -1, -1);
}
-THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage, long storageOffset,
+THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage, long storageOffset,
long size0, long stride0,
long size1, long stride1,
long size2, long stride2,
@@ -140,102 +139,102 @@ THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *stor
long size[4] = {size0, size1, size2, size3};
long stride[4] = {stride0, stride1, stride2, stride3};
- THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor));
- THCudaTensor_rawInit(state, self);
- THCudaTensor_rawSet(state, self, storage, storageOffset, 4, size, stride);
+ THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
+ THCTensor_(rawInit)(state, self);
+ THCTensor_(rawSet)(state, self, storage, storageOffset, 4, size, stride);
return self;
}
-THCudaTensor *THCudaTensor_newWithSize(THCState *state, THLongStorage *size, THLongStorage *stride)
+THCTensor *THCTensor_(newWithSize)(THCState *state, THLongStorage *size, THLongStorage *stride)
{
- return THCudaTensor_newWithStorage(state, NULL, 0, size, stride);
+ return THCTensor_(newWithStorage)(state, NULL, 0, size, stride);
}
-THCudaTensor *THCudaTensor_newWithSize1d(THCState *state, long size0)
+THCTensor *THCTensor_(newWithSize1d)(THCState *state, long size0)
{
- return THCudaTensor_newWithSize4d(state, size0, -1, -1, -1);
+ return THCTensor_(newWithSize4d)(state, size0, -1, -1, -1);
}
-THCudaTensor *THCudaTensor_newWithSize2d(THCState *state, long size0, long size1)
+THCTensor *THCTensor_(newWithSize2d)(THCState *state, long size0, long size1)
{
- return THCudaTensor_newWithSize4d(state, size0, size1, -1, -1);
+ return THCTensor_(newWithSize4d)(state, size0, size1, -1, -1);
}
-THCudaTensor *THCudaTensor_newWithSize3d(THCState *state, long size0, long size1, long size2)
+THCTensor *THCTensor_(newWithSize3d)(THCState *state, long size0, long size1, long size2)
{
- return THCudaTensor_newWithSize4d(state, size0, size1, size2, -1);
+ return THCTensor_(newWithSize4d)(state, size0, size1, size2, -1);
}
-THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0, long size1, long size2, long size3)
+THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0, long size1, long size2, long size3)
{
long size[4] = {size0, size1, size2, size3};
- THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor));
- THCudaTensor_rawInit(state, self);
- THCudaTensor_rawResize(state, self, 4, size, NULL);
+ THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor));
+ THCTensor_(rawInit)(state, self);
+ THCTensor_(rawResize)(state, self, 4, size, NULL);
return self;
}
-THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self)
+THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self)
{
- THCudaTensor *tensor = THCudaTensor_new(state);
- THCudaTensor_resizeAs(state, tensor, self);
- THCudaTensor_copy(state, tensor, self);
+ THCTensor *tensor = THCTensor_(new)(state);
+ THCTensor_(resizeAs)(state, tensor, self);
+ THCTensor_(copy)(state, tensor, self);
return tensor;
}
-THCudaTensor *THCudaTensor_newContiguous(THCState *state, THCudaTensor *self)
+THCTensor *THCTensor_(newContiguous)(THCState *state, THCTensor *self)
{
- if(!THCudaTensor_isContiguous(state, self))
- return THCudaTensor_newClone(state, self);
+ if(!THCTensor_(isContiguous)(state, self))
+ return THCTensor_(newClone)(state, self);
else
{
- THCudaTensor_retain(state, self);
+ THCTensor_(retain)(state, self);
return self;
}
}
-THCudaTensor *THCudaTensor_newSelect(THCState *state, THCudaTensor *tensor, int dimension_, long sliceIndex_)
+THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int dimension_, long sliceIndex_)
{
- THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor);
- THCudaTensor_select(state, self, NULL, dimension_, sliceIndex_);
+ THCTensor *self = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(select)(state, self, NULL, dimension_, sliceIndex_);
return self;
}
-THCudaTensor *THCudaTensor_newNarrow(THCState *state, THCudaTensor *tensor, int dimension_, long firstIndex_, long size_)
+THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_)
{
- THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor);
- THCudaTensor_narrow(state, self, NULL, dimension_, firstIndex_, size_);
+ THCTensor *self = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, self, NULL, dimension_, firstIndex_, size_);
return self;
}
-THCudaTensor *THCudaTensor_newTranspose(THCState *state, THCudaTensor *tensor, int dimension1_, int dimension2_)
+THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_)
{
- THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor);
- THCudaTensor_transpose(state, self, NULL, dimension1_, dimension2_);
+ THCTensor *self = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(transpose)(state, self, NULL, dimension1_, dimension2_);
return self;
}
-THCudaTensor *THCudaTensor_newUnfold(THCState *state, THCudaTensor *tensor, int dimension_, long size_, long step_)
+THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_)
{
- THCudaTensor *self = THCudaTensor_newWithTensor(state, tensor);
- THCudaTensor_unfold(state, self, NULL, dimension_, size_, step_);
+ THCTensor *self = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(unfold)(state, self, NULL, dimension_, size_, step_);
return self;
}
/* Resize */
-void THCudaTensor_resize(THCState *state, THCudaTensor *self, THLongStorage *size, THLongStorage *stride)
+void THCTensor_(resize)(THCState *state, THCTensor *self, THLongStorage *size, THLongStorage *stride)
{
THArgCheck(size != NULL, 2, "invalid size");
if(stride)
THArgCheck(stride->size == size->size, 3, "invalid stride");
- THCudaTensor_rawResize(state, self, size->size, size->data, (stride ? stride->data : NULL));
+ THCTensor_(rawResize)(state, self, size->size, size->data, (stride ? stride->data : NULL));
}
-void THCudaTensor_resizeAs(THCState *state, THCudaTensor *self, THCudaTensor *src)
+void THCTensor_(resizeAs)(THCState *state, THCTensor *self, THCTensor *src)
{
int isSame = 0;
int d;
@@ -253,42 +252,42 @@ void THCudaTensor_resizeAs(THCState *state, THCudaTensor *self, THCudaTensor *sr
}
if(!isSame)
- THCudaTensor_rawResize(state, self, src->nDimension, src->size, NULL);
+ THCTensor_(rawResize)(state, self, src->nDimension, src->size, NULL);
}
-void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0)
+void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0)
{
- THCudaTensor_resize4d(state, tensor, size0, -1, -1, -1);
+ THCTensor_(resize4d)(state, tensor, size0, -1, -1, -1);
}
-void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long size0, long size1)
+void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0, long size1)
{
- THCudaTensor_resize4d(state, tensor, size0, size1, -1, -1);
+ THCTensor_(resize4d)(state, tensor, size0, size1, -1, -1);
}
-void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0, long size1, long size2)
+void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0, long size1, long size2)
{
- THCudaTensor_resize4d(state, tensor, size0, size1, size2, -1);
+ THCTensor_(resize4d)(state, tensor, size0, size1, size2, -1);
}
-void THCudaTensor_resize4d(THCState *state, THCudaTensor *self, long size0, long size1, long size2, long size3)
+void THCTensor_(resize4d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3)
{
long size[4] = {size0, size1, size2, size3};
- THCudaTensor_rawResize(state, self, 4, size, NULL);
+ THCTensor_(rawResize)(state, self, 4, size, NULL);
}
-void THCudaTensor_resize5d(THCState *state, THCudaTensor *self, long size0, long size1, long size2, long size3, long size4)
+void THCTensor_(resize5d)(THCState *state, THCTensor *self, long size0, long size1, long size2, long size3, long size4)
{
long size[5] = {size0, size1, size2, size3, size4};
- THCudaTensor_rawResize(state, self, 5, size, NULL);
+ THCTensor_(rawResize)(state, self, 5, size, NULL);
}
-void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src)
+void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src)
{
if(self != src)
- THCudaTensor_rawSet(state,
+ THCTensor_(rawSet)(state,
self,
src->storage,
src->storageOffset,
@@ -297,12 +296,12 @@ void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src)
src->stride);
}
-void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_)
+void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_)
{
if(size_ && stride_)
THArgCheck(size_->size == stride_->size, 5, "inconsistent size/stride sizes");
- THCudaTensor_rawSet(state,
+ THCTensor_(rawSet)(state,
self,
storage_,
storageOffset_,
@@ -311,40 +310,40 @@ void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage
(stride_ ? stride_->data : NULL));
}
-void THCudaTensor_setStorage1d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_)
{
- THCudaTensor_setStorage4d(state, self, storage_, storageOffset_,
+ THCTensor_(setStorage4d)(state, self, storage_, storageOffset_,
size0_, stride0_,
-1, -1,
-1, -1,
-1, -1);
}
-void THCudaTensor_setStorage2d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_)
{
- THCudaTensor_setStorage4d(state, self, storage_, storageOffset_,
+ THCTensor_(setStorage4d)(state, self, storage_, storageOffset_,
size0_, stride0_,
size1_, stride1_,
-1, -1,
-1, -1);
}
-void THCudaTensor_setStorage3d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_)
{
- THCudaTensor_setStorage4d(state, self, storage_, storageOffset_,
+ THCTensor_(setStorage4d)(state, self, storage_, storageOffset_,
size0_, stride0_,
size1_, stride1_,
size2_, stride2_,
-1, -1);
}
-void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_,
@@ -354,11 +353,11 @@ void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorag
long size[4] = {size0_, size1_, size2_, size3_};
long stride[4] = {stride0_, stride1_, stride2_, stride3_};
- THCudaTensor_rawSet(state, self, storage_, storageOffset_, 4, size, stride);
+ THCTensor_(rawSet)(state, self, storage_, storageOffset_, 4, size, stride);
}
-void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long firstIndex, long size)
+void THCTensor_(narrow)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long firstIndex, long size)
{
if(!src)
src = self;
@@ -367,7 +366,7 @@ void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src,
THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 4, "out of range");
THArgCheck( (size > 0) && (firstIndex+size <= src->size[dimension]), 5, "out of range");
- THCudaTensor_set(state, self, src);
+ THCTensor_(set)(state, self, src);
if(firstIndex > 0)
self->storageOffset += firstIndex*self->stride[dimension];
@@ -375,7 +374,7 @@ void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src,
self->size[dimension] = size;
}
-void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long sliceIndex)
+void THCTensor_(select)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long sliceIndex)
{
int d;
@@ -386,8 +385,8 @@ void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src,
THArgCheck((dimension >= 0) && (dimension < src->nDimension), 3, "out of range");
THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 4, "out of range");
- THCudaTensor_set(state, self, src);
- THCudaTensor_narrow(state, self, NULL, dimension, sliceIndex, 1);
+ THCTensor_(set)(state, self, src);
+ THCTensor_(narrow)(state, self, NULL, dimension, sliceIndex, 1);
for(d = dimension; d < self->nDimension-1; d++)
{
self->size[d] = self->size[d+1];
@@ -396,7 +395,7 @@ void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src,
self->nDimension--;
}
-void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1, int dimension2)
+void THCTensor_(transpose)(THCState *state, THCTensor *self, THCTensor *src, int dimension1, int dimension2)
{
long z;
@@ -406,7 +405,7 @@ void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *s
THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, "out of range");
THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, "out of range");
- THCudaTensor_set(state, self, src);
+ THCTensor_(set)(state, self, src);
if(dimension1 == dimension2)
return;
@@ -419,7 +418,7 @@ void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *s
self->size[dimension2] = z;
}
-void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long size, long step)
+void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src, int dimension, long size, long step)
{
long *newSize;
long *newStride;
@@ -433,7 +432,7 @@ void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src,
THArgCheck(size <= src->size[dimension], 3, "out of range");
THArgCheck(step > 0, 4, "invalid step");
- THCudaTensor_set(state, self, src);
+ THCTensor_(set)(state, self, src);
newSize = (long*)THAlloc(sizeof(long)*(self->nDimension+1));
newStride = (long*)THAlloc(sizeof(long)*(self->nDimension+1));
@@ -463,7 +462,7 @@ void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src,
}
/* we have to handle the case where the result is a number */
-void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src)
+void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src)
{
int ndim = 0;
int d;
@@ -471,7 +470,7 @@ void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src
if(!src)
src = self;
- THCudaTensor_set(state, self, src);
+ THCTensor_(set)(state, self, src);
for(d = 0; d < src->nDimension; d++)
{
@@ -496,7 +495,7 @@ void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src
self->nDimension = ndim;
}
-void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension)
+void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension)
{
int d;
@@ -505,7 +504,7 @@ void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *s
THArgCheck(dimension < src->nDimension, 3, "dimension out of range");
- THCudaTensor_set(state, self, src);
+ THCTensor_(set)(state, self, src);
if(src->size[dimension] == 1 && src->nDimension > 1)
{
@@ -518,7 +517,7 @@ void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *s
}
}
-int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self)
+int THCTensor_(isContiguous)(THCState *state, const THCTensor *self)
{
long z = 1;
int d;
@@ -535,7 +534,7 @@ int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self)
return 1;
}
-int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongStorage *dims)
+int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims)
{
int d;
if (self->nDimension != dims->size)
@@ -549,7 +548,7 @@ int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongS
return 1;
}
-int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCudaTensor *src)
+int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src)
{
if (self->storage == src->storage &&
self->storageOffset == src->storageOffset &&
@@ -566,7 +565,7 @@ int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCuda
return 0;
}
-int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const THCudaTensor* src)
+int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor* src)
{
int d;
if (self->nDimension != src->nDimension)
@@ -579,7 +578,7 @@ int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const T
return 1;
}
-long THCudaTensor_nElement(THCState *state, const THCudaTensor *self)
+long THCTensor_(nElement)(THCState *state, const THCTensor *self)
{
if(self->nDimension == 0)
return 0;
@@ -593,13 +592,13 @@ long THCudaTensor_nElement(THCState *state, const THCudaTensor *self)
}
}
-void THCudaTensor_retain(THCState *state, THCudaTensor *self)
+void THCTensor_(retain)(THCState *state, THCTensor *self)
{
if(self->flag & TH_TENSOR_REFCOUNTED)
THAtomicIncrementRef(&self->refcount);
}
-void THCudaTensor_free(THCState *state, THCudaTensor *self)
+void THCTensor_(free)(THCState *state, THCTensor *self)
{
if(!self)
return;
@@ -611,23 +610,23 @@ void THCudaTensor_free(THCState *state, THCudaTensor *self)
THFree(self->size);
THFree(self->stride);
if(self->storage)
- THCudaStorage_free(state, self->storage);
+ THCStorage_(free)(state, self->storage);
THFree(self);
}
}
}
-void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst)
+void THCTensor_(freeCopyTo)(THCState *state, THCTensor *self, THCTensor *dst)
{
if(self != dst)
- THCudaTensor_copy(state, dst, self);
+ THCTensor_(copy)(state, dst, self);
- THCudaTensor_free(state, self);
+ THCTensor_(free)(state, self);
}
/*******************************************************************************/
-static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self)
+static void THCTensor_(rawInit)(THCState *state, THCTensor *self)
{
self->refcount = 1;
self->storage = NULL;
@@ -638,18 +637,18 @@ static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self)
self->flag = TH_TENSOR_REFCOUNTED;
}
-static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride)
+static void THCTensor_(rawSet)(THCState *state, THCTensor *self, THCStorage *storage, long storageOffset, int nDimension, long *size, long *stride)
{
/* storage */
if(self->storage != storage)
{
if(self->storage)
- THCudaStorage_free(state, self->storage);
+ THCStorage_(free)(state, self->storage);
if(storage)
{
self->storage = storage;
- THCudaStorage_retain(state, self->storage);
+ THCStorage_(retain)(state, self->storage);
}
else
self->storage = NULL;
@@ -661,10 +660,10 @@ static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStora
self->storageOffset = storageOffset;
/* size and stride */
- THCudaTensor_rawResize(state, self, nDimension, size, stride);
+ THCTensor_(rawResize)(state, self, nDimension, size, stride);
}
-void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride)
+void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride)
{
int d;
int nDimension_;
@@ -722,72 +721,72 @@ void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension,
if(totalSize+self->storageOffset > 0)
{
if(!self->storage)
- self->storage = THCudaStorage_new(state);
+ self->storage = THCStorage_(new)(state);
if(totalSize+self->storageOffset > self->storage->size)
- THCudaStorage_resize(state, self->storage, totalSize+self->storageOffset);
+ THCStorage_(resize)(state, self->storage, totalSize+self->storageOffset);
}
}
else
self->nDimension = 0;
}
-void THCudaTensor_set1d(THCState *state, THCudaTensor *tensor, long x0, float value)
+void THCTensor_(set1d)(THCState *state, THCTensor *tensor, long x0, real value)
{
THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension");
THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range");
- THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value);
+ THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value);
}
-float THCudaTensor_get1d(THCState *state, const THCudaTensor *tensor, long x0)
+real THCTensor_(get1d)(THCState *state, const THCTensor *tensor, long x0)
{
THArgCheck(tensor->nDimension == 1, 1, "tensor must have one dimension");
THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, "out of range");
- return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]);
+ return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]);
}
-void THCudaTensor_set2d(THCState *state, THCudaTensor *tensor, long x0, long x1, float value)
+void THCTensor_(set2d)(THCState *state, THCTensor *tensor, long x0, long x1, real value)
{
THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range");
- THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value);
+ THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value);
}
-float THCudaTensor_get2d(THCState *state, const THCudaTensor *tensor, long x0, long x1)
+real THCTensor_(get2d)(THCState *state, const THCTensor *tensor, long x0, long x1)
{
THArgCheck(tensor->nDimension == 2, 1, "tensor must have two dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, "out of range");
- return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]);
+ return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]);
}
-void THCudaTensor_set3d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, float value)
+void THCTensor_(set3d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, real value)
{
THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions");
THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range");
- THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value);
+ THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value);
}
-float THCudaTensor_get3d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2)
+real THCTensor_(get3d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2)
{
THArgCheck(tensor->nDimension == 3, 1, "tensor must have three dimensions");
THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, "out of range");
- return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]);
+ return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]);
}
-void THCudaTensor_set4d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, long x3, float value)
+void THCTensor_(set4d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, long x3, real value)
{
THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range");
- THCudaStorage_set(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value);
+ THCStorage_(set)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value);
}
-float THCudaTensor_get4d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2, long x3)
+real THCTensor_(get4d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2, long x3)
{
THArgCheck(tensor->nDimension == 4, 1, "tensor must have four dimensions");
THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, "out of range");
- return THCudaStorage_get(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]);
+ return THCStorage_(get)(state, tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]);
}
-int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...)
+int THCTensor_(checkGPU)(THCState *state, unsigned int nTensors, ...)
{
#ifdef DISABLE_CHECK_GPU
return 1; // Disable GPU checks.
@@ -798,11 +797,11 @@ int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...)
va_start(args, nTensors);
int valid = 1;
for (unsigned int i = 0; i < nTensors; i++) {
- THCudaTensor* tensor = va_arg(args, THCudaTensor*);
+ THCTensor* tensor = va_arg(args, THCTensor*);
if (tensor == NULL) {
continue;
}
- int tensorDev = THCudaTensor_getDevice(state, tensor);
+ int tensorDev = THCTensor_(getDevice)(state, tensor);
if (tensorDev != -1 && tensorDev != curDev) {
valid = 0;
break;
@@ -812,3 +811,5 @@ int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...)
return valid;
#endif
}
+
+#endif
diff --git a/lib/THC/generic/THCTensor.cu b/lib/THC/generic/THCTensor.cu
index 3972ee3..beb96ec 100644
--- a/lib/THC/generic/THCTensor.cu
+++ b/lib/THC/generic/THCTensor.cu
@@ -1,14 +1,16 @@
-#include "THCTensor.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCTensor.cu"
+#else
-cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor *self)
+cudaTextureObject_t THCTensor_(getTextureObject)(THCState *state, THCTensor *self)
{
- THAssert(THCudaTensor_checkGPU(state, 1, self));
+ THAssert(THCTensor_(checkGPU)(state, 1, self));
cudaTextureObject_t texObj;
struct cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeLinear;
- resDesc.res.linear.devPtr = THCudaTensor_data(state, self);
- resDesc.res.linear.sizeInBytes = THCudaTensor_nElement(state, self) * 4;
+ resDesc.res.linear.devPtr = THCTensor_(data)(state, self);
+ resDesc.res.linear.sizeInBytes = THCTensor_(nElement)(state, self) * 4;
resDesc.res.linear.desc = cudaCreateChannelDesc(32, 0, 0, 0,
cudaChannelFormatKindFloat);
struct cudaTextureDesc texDesc;
@@ -16,19 +18,21 @@ cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
cudaError errcode = cudaGetLastError();
if(errcode != cudaSuccess) {
- if (THCudaTensor_nElement(state, self) > 2>>27)
+ if (THCTensor_(nElement)(state, self) > 2>>27)
THError("Failed to create texture object, "
"nElement:%ld exceeds 27-bit addressing required for tex1Dfetch. Cuda Error: %s",
- THCudaTensor_nElement(state, self), cudaGetErrorString(errcode));
+ THCTensor_(nElement)(state, self), cudaGetErrorString(errcode));
else
THError("Failed to create texture object: %s", cudaGetErrorString(errcode));
}
return texObj;
}
-THC_API int THCudaTensor_getDevice(THCState* state, const THCudaTensor* thc) {
+THC_API int THCTensor_(getDevice)(THCState* state, const THCTensor* thc) {
if (!thc->storage) return -1;
cudaPointerAttributes attr;
THCudaCheck(cudaPointerGetAttributes(&attr, thc->storage->data));
return attr.device;
}
+
+#endif
diff --git a/lib/THC/generic/THCTensor.h b/lib/THC/generic/THCTensor.h
index 27689e8..175eaee 100644
--- a/lib/THC/generic/THCTensor.h
+++ b/lib/THC/generic/THCTensor.h
@@ -1,133 +1,130 @@
-#ifndef THC_TENSOR_INC
-#define THC_TENSOR_INC
-
-#include "THTensor.h"
-#include "THCStorage.h"
-#include "THCGeneral.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCTensor.h"
+#else
#define TH_TENSOR_REFCOUNTED 1
-typedef struct THCudaTensor
+typedef struct THCTensor
{
long *size;
long *stride;
int nDimension;
- THCudaStorage *storage;
+ THCStorage *storage;
long storageOffset;
int refcount;
char flag;
-} THCudaTensor;
+} THCTensor;
/**** access methods ****/
-THC_API THCudaStorage* THCudaTensor_storage(THCState *state, const THCudaTensor *self);
-THC_API long THCudaTensor_storageOffset(THCState *state, const THCudaTensor *self);
-THC_API int THCudaTensor_nDimension(THCState *state, const THCudaTensor *self);
-THC_API long THCudaTensor_size(THCState *state, const THCudaTensor *self, int dim);
-THC_API long THCudaTensor_stride(THCState *state, const THCudaTensor *self, int dim);
-THC_API THLongStorage *THCudaTensor_newSizeOf(THCState *state, THCudaTensor *self);
-THC_API THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self);
-THC_API float *THCudaTensor_data(THCState *state, const THCudaTensor *self);
+THC_API THCStorage* THCTensor_(storage)(THCState *state, const THCTensor *self);
+THC_API long THCTensor_(storageOffset)(THCState *state, const THCTensor *self);
+THC_API int THCTensor_(nDimension)(THCState *state, const THCTensor *self);
+THC_API long THCTensor_(size)(THCState *state, const THCTensor *self, int dim);
+THC_API long THCTensor_(stride)(THCState *state, const THCTensor *self, int dim);
+THC_API THLongStorage *THCTensor_(newSizeOf)(THCState *state, THCTensor *self);
+THC_API THLongStorage *THCTensor_(newStrideOf)(THCState *state, THCTensor *self);
+THC_API real *THCTensor_(data)(THCState *state, const THCTensor *self);
-THC_API void THCudaTensor_setFlag(THCState *state, THCudaTensor *self, const char flag);
-THC_API void THCudaTensor_clearFlag(THCState *state, THCudaTensor *self, const char flag);
+THC_API void THCTensor_(setFlag)(THCState *state, THCTensor *self, const char flag);
+THC_API void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag);
/**** creation methods ****/
-THC_API THCudaTensor *THCudaTensor_new(THCState *state);
-THC_API THCudaTensor *THCudaTensor_newWithTensor(THCState *state, THCudaTensor *tensor);
+THC_API THCTensor *THCTensor_(new)(THCState *state);
+THC_API THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor);
/* stride might be NULL */
-THC_API THCudaTensor *THCudaTensor_newWithStorage(THCState *state, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
-THC_API THCudaTensor *THCudaTensor_newWithStorage1d(THCState *state, THCudaStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage)(THCState *state, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
+THC_API THCTensor *THCTensor_(newWithStorage1d)(THCState *state, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_);
-THC_API THCudaTensor *THCudaTensor_newWithStorage2d(THCState *state, THCudaStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage2d)(THCState *state, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_);
-THC_API THCudaTensor *THCudaTensor_newWithStorage3d(THCState *state, THCudaStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage3d)(THCState *state, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_);
-THC_API THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithStorage4d)(THCState *state, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_,
long size3_, long stride3_);
/* stride might be NULL */
-THC_API THCudaTensor *THCudaTensor_newWithSize(THCState *state, THLongStorage *size_, THLongStorage *stride_);
-THC_API THCudaTensor *THCudaTensor_newWithSize1d(THCState *state, long size0_);
-THC_API THCudaTensor *THCudaTensor_newWithSize2d(THCState *state, long size0_, long size1_);
-THC_API THCudaTensor *THCudaTensor_newWithSize3d(THCState *state, long size0_, long size1_, long size2_);
-THC_API THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0_, long size1_, long size2_, long size3_);
-
-THC_API THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self);
-THC_API THCudaTensor *THCudaTensor_newContiguous(THCState *state, THCudaTensor *tensor);
-THC_API THCudaTensor *THCudaTensor_newSelect(THCState *state, THCudaTensor *tensor, int dimension_, long sliceIndex_);
-THC_API THCudaTensor *THCudaTensor_newNarrow(THCState *state, THCudaTensor *tensor, int dimension_, long firstIndex_, long size_);
-THC_API THCudaTensor *THCudaTensor_newTranspose(THCState *state, THCudaTensor *tensor, int dimension1_, int dimension2_);
-THC_API THCudaTensor *THCudaTensor_newUnfold(THCState *state, THCudaTensor *tensor, int dimension_, long size_, long step_);
-
-THC_API void THCudaTensor_resize(THCState *state, THCudaTensor *tensor, THLongStorage *size, THLongStorage *stride);
-THC_API void THCudaTensor_resizeAs(THCState *state, THCudaTensor *tensor, THCudaTensor *src);
-THC_API void THCudaTensor_resize1d(THCState *state, THCudaTensor *tensor, long size0_);
-THC_API void THCudaTensor_resize2d(THCState *state, THCudaTensor *tensor, long size0_, long size1_);
-THC_API void THCudaTensor_resize3d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_);
-THC_API void THCudaTensor_resize4d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_);
-THC_API void THCudaTensor_resize5d(THCState *state, THCudaTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_);
-THC_API void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride);
-
-THC_API void THCudaTensor_set(THCState *state, THCudaTensor *self, THCudaTensor *src);
-THC_API void THCudaTensor_setStorage(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
-THC_API void THCudaTensor_setStorage1d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+THC_API THCTensor *THCTensor_(newWithSize)(THCState *state, THLongStorage *size_, THLongStorage *stride_);
+THC_API THCTensor *THCTensor_(newWithSize1d)(THCState *state, long size0_);
+THC_API THCTensor *THCTensor_(newWithSize2d)(THCState *state, long size0_, long size1_);
+THC_API THCTensor *THCTensor_(newWithSize3d)(THCState *state, long size0_, long size1_, long size2_);
+THC_API THCTensor *THCTensor_(newWithSize4d)(THCState *state, long size0_, long size1_, long size2_, long size3_);
+
+THC_API THCTensor *THCTensor_(newClone)(THCState *state, THCTensor *self);
+THC_API THCTensor *THCTensor_(newContiguous)(THCState *state, THCTensor *tensor);
+THC_API THCTensor *THCTensor_(newSelect)(THCState *state, THCTensor *tensor, int dimension_, long sliceIndex_);
+THC_API THCTensor *THCTensor_(newNarrow)(THCState *state, THCTensor *tensor, int dimension_, long firstIndex_, long size_);
+THC_API THCTensor *THCTensor_(newTranspose)(THCState *state, THCTensor *tensor, int dimension1_, int dimension2_);
+THC_API THCTensor *THCTensor_(newUnfold)(THCState *state, THCTensor *tensor, int dimension_, long size_, long step_);
+
+THC_API void THCTensor_(resize)(THCState *state, THCTensor *tensor, THLongStorage *size, THLongStorage *stride);
+THC_API void THCTensor_(resizeAs)(THCState *state, THCTensor *tensor, THCTensor *src);
+THC_API void THCTensor_(resize1d)(THCState *state, THCTensor *tensor, long size0_);
+THC_API void THCTensor_(resize2d)(THCState *state, THCTensor *tensor, long size0_, long size1_);
+THC_API void THCTensor_(resize3d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_);
+THC_API void THCTensor_(resize4d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_);
+THC_API void THCTensor_(resize5d)(THCState *state, THCTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_);
+THC_API void THCTensor_(rawResize)(THCState *state, THCTensor *self, int nDimension, long *size, long *stride);
+
+THC_API void THCTensor_(set)(THCState *state, THCTensor *self, THCTensor *src);
+THC_API void THCTensor_(setStorage)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_, THLongStorage *size_, THLongStorage *stride_);
+THC_API void THCTensor_(setStorage1d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_);
-THC_API void THCudaTensor_setStorage2d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage2d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_);
-THC_API void THCudaTensor_setStorage3d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage3d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_);
-THC_API void THCudaTensor_setStorage4d(THCState *state, THCudaTensor *self, THCudaStorage *storage_, long storageOffset_,
+THC_API void THCTensor_(setStorage4d)(THCState *state, THCTensor *self, THCStorage *storage_, long storageOffset_,
long size0_, long stride0_,
long size1_, long stride1_,
long size2_, long stride2_,
long size3_, long stride3_);
-THC_API void THCudaTensor_narrow(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long firstIndex_, long size_);
-THC_API void THCudaTensor_select(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long sliceIndex_);
-THC_API void THCudaTensor_transpose(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension1_, int dimension2_);
-THC_API void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_, long size_, long step_);
+THC_API void THCTensor_(narrow)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long firstIndex_, long size_);
+THC_API void THCTensor_(select)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long sliceIndex_);
+THC_API void THCTensor_(transpose)(THCState *state, THCTensor *self, THCTensor *src, int dimension1_, int dimension2_);
+THC_API void THCTensor_(unfold)(THCState *state, THCTensor *self, THCTensor *src, int dimension_, long size_, long step_);
-THC_API void THCudaTensor_squeeze(THCState *state, THCudaTensor *self, THCudaTensor *src);
-THC_API void THCudaTensor_squeeze1d(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension_);
+THC_API void THCTensor_(squeeze)(THCState *state, THCTensor *self, THCTensor *src);
+THC_API void THCTensor_(squeeze1d)(THCState *state, THCTensor *self, THCTensor *src, int dimension_);
-THC_API int THCudaTensor_isContiguous(THCState *state, const THCudaTensor *self);
-THC_API int THCudaTensor_isSameSizeAs(THCState *state, const THCudaTensor *self, const THCudaTensor *src);
-THC_API int THCudaTensor_isSetTo(THCState *state, const THCudaTensor *self, const THCudaTensor *src);
-THC_API int THCudaTensor_isSize(THCState *state, const THCudaTensor *self, const THLongStorage *dims);
-THC_API long THCudaTensor_nElement(THCState *state, const THCudaTensor *self);
+THC_API int THCTensor_(isContiguous)(THCState *state, const THCTensor *self);
+THC_API int THCTensor_(isSameSizeAs)(THCState *state, const THCTensor *self, const THCTensor *src);
+THC_API int THCTensor_(isSetTo)(THCState *state, const THCTensor *self, const THCTensor *src);
+THC_API int THCTensor_(isSize)(THCState *state, const THCTensor *self, const THLongStorage *dims);
+THC_API long THCTensor_(nElement)(THCState *state, const THCTensor *self);
-THC_API void THCudaTensor_retain(THCState *state, THCudaTensor *self);
-THC_API void THCudaTensor_free(THCState *state, THCudaTensor *self);
-THC_API void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst);
+THC_API void THCTensor_(retain)(THCState *state, THCTensor *self);
+THC_API void THCTensor_(free)(THCState *state, THCTensor *self);
+THC_API void THCTensor_(freeCopyTo)(THCState *state, THCTensor *self, THCTensor *dst);
/* Slow access methods [check everything] */
-THC_API void THCudaTensor_set1d(THCState *state, THCudaTensor *tensor, long x0, float value);
-THC_API void THCudaTensor_set2d(THCState *state, THCudaTensor *tensor, long x0, long x1, float value);
-THC_API void THCudaTensor_set3d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, float value);
-THC_API void THCudaTensor_set4d(THCState *state, THCudaTensor *tensor, long x0, long x1, long x2, long x3, float value);
+THC_API void THCTensor_(set1d)(THCState *state, THCTensor *tensor, long x0, real value);
+THC_API void THCTensor_(set2d)(THCState *state, THCTensor *tensor, long x0, long x1, real value);
+THC_API void THCTensor_(set3d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, real value);
+THC_API void THCTensor_(set4d)(THCState *state, THCTensor *tensor, long x0, long x1, long x2, long x3, real value);
-THC_API float THCudaTensor_get1d(THCState *state, const THCudaTensor *tensor, long x0);
-THC_API float THCudaTensor_get2d(THCState *state, const THCudaTensor *tensor, long x0, long x1);
-THC_API float THCudaTensor_get3d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2);
-THC_API float THCudaTensor_get4d(THCState *state, const THCudaTensor *tensor, long x0, long x1, long x2, long x3);
+THC_API real THCTensor_(get1d)(THCState *state, const THCTensor *tensor, long x0);
+THC_API real THCTensor_(get2d)(THCState *state, const THCTensor *tensor, long x0, long x1);
+THC_API real THCTensor_(get3d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2);
+THC_API real THCTensor_(get4d)(THCState *state, const THCTensor *tensor, long x0, long x1, long x2, long x3);
/* CUDA-specific functions */
-THC_API cudaTextureObject_t THCudaTensor_getTextureObject(THCState *state, THCudaTensor *self);
-THC_API int THCudaTensor_getDevice(THCState *state, const THCudaTensor *self);
-THC_API int THCudaTensor_checkGPU(THCState *state, unsigned int nTensors, ...);
+THC_API cudaTextureObject_t THCTensor_(getTextureObject)(THCState *state, THCTensor *self);
+THC_API int THCTensor_(getDevice)(THCState *state, const THCTensor *self);
+THC_API int THCTensor_(checkGPU)(THCState *state, unsigned int nTensors, ...);
#endif
diff --git a/lib/THC/generic/THCTensorCopy.c b/lib/THC/generic/THCTensorCopy.c
index 5fbacca..c1813f9 100644
--- a/lib/THC/generic/THCTensorCopy.c
+++ b/lib/THC/generic/THCTensorCopy.c
@@ -1,42 +1,42 @@
-#include "THCTensorCopy.h"
-#include "THCGeneral.h"
-#include "THCTensor.h"
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCTensorCopy.c"
+#else
/* specific methods */
-void THCudaTensor_copyFloat(THCState *state, THCudaTensor *self, struct THFloatTensor *src)
+void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src)
{
- THArgCheck(THCudaTensor_nElement(state, self) == THFloatTensor_nElement(src), 2, "sizes do not match");
+ THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match");
{
- THCudaTensor *selfc = THCudaTensor_newContiguous(state, self);
- src = THFloatTensor_newContiguous(src);
-
- THCudaCheck(cudaMemcpy(THCudaTensor_data(state, selfc),
- THFloatTensor_data(src),
- THFloatTensor_nElement(src) * sizeof(float),
+ THCTensor *selfc = THCTensor_(newContiguous)(state, self);
+ src = THTensor_(newContiguous)(src);
+
+ THCudaCheck(cudaMemcpy(THCTensor_(data)(state,selfc),
+ THTensor_(data)(src),
+ THTensor_(nElement)(src) * sizeof(real),
cudaMemcpyHostToDevice));
- THFloatTensor_free(src);
- THCudaTensor_freeCopyTo(state, selfc, self);
+ THTensor_(free)(src);
+ THCTensor_(freeCopyTo)(state, selfc, self);
}
}
-/* everything comes down to copy to a tensor of floats */
#define IMPLEMENT_TH_CUDA_TENSOR_COPY(TYPEC) \
-void THCudaTensor_copy##TYPEC(THCState *state, THCudaTensor *self, struct TH##TYPEC##Tensor *src) \
+void THCTensor_(copy##TYPEC)(THCState *state, THCTensor *self, struct TH##TYPEC##Tensor *src) \
{ \
- THArgCheck(THCudaTensor_nElement(state, self) == TH##TYPEC##Tensor_nElement(src), 2, "sizes do not match"); \
- \
- { \
+ THArgCheck(THCTensor_(nElement)(state, self) == TH##TYPEC##Tensor_nElement(src), 2, "sizes do not match"); \
+ if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
+ THCTensor_(copyCPU)(state, self, (THTensor*) src); /* cast just removes warnings */ \
+ } else { \
THLongStorage *size = TH##TYPEC##Tensor_newSizeOf(src); \
- THFloatTensor *srcf = THFloatTensor_newWithSize(size, NULL); \
+ THTensor *srcf = THTensor_(newWithSize)(size, NULL); \
\
- THFloatTensor_copy##TYPEC(srcf, src); \
- THCudaTensor_copyFloat(state, self, srcf); \
+ THTensor_(copy##TYPEC)(srcf, src); \
+ THCTensor_(copyCPU)(state, self, srcf); \
\
THLongStorage_free(size); \
- THFloatTensor_free(srcf); \
+ THTensor_(free)(srcf); \
} \
}
@@ -45,43 +45,45 @@ IMPLEMENT_TH_CUDA_TENSOR_COPY(Char)
IMPLEMENT_TH_CUDA_TENSOR_COPY(Short)
IMPLEMENT_TH_CUDA_TENSOR_COPY(Int)
IMPLEMENT_TH_CUDA_TENSOR_COPY(Long)
+IMPLEMENT_TH_CUDA_TENSOR_COPY(Float)
IMPLEMENT_TH_CUDA_TENSOR_COPY(Double)
/* copyCuda */
-void THFloatTensor_copyCuda(THCState *state, THFloatTensor *self, struct THCudaTensor *src)
+void THTensor_(copyCuda)(THCState *state, THTensor *self, struct THCTensor *src)
{
- THArgCheck(THFloatTensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match");
+ THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match");
{
- THFloatTensor *selfc = THFloatTensor_newContiguous(self);
- src = THCudaTensor_newContiguous(state, src);
+ THTensor *selfc = THTensor_(newContiguous)(self);
+ src = THCTensor_(newContiguous)(state, src);
- THCudaCheck(cudaMemcpy(THFloatTensor_data(selfc),
- THCudaTensor_data(state, src),
- THCudaTensor_nElement(state, src) * sizeof(float),
+ THCudaCheck(cudaMemcpy(THTensor_(data)(selfc),
+ THCTensor_(data)(state, src),
+ THCTensor_(nElement)(state, src) * sizeof(real),
cudaMemcpyDeviceToHost));
- THCudaTensor_free(state, src);
- THFloatTensor_freeCopyTo(selfc, self);
+ THCTensor_(free)(state, src);
+ THTensor_(freeCopyTo)(selfc, self);
}
}
-#define IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(TYPEC) \
- void TH##TYPEC##Tensor_copyCuda(THCState *state, TH##TYPEC##Tensor *self, struct THCudaTensor *src) \
- { \
- THArgCheck(TH##TYPEC##Tensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match"); \
- \
- { \
- THLongStorage *size = THCudaTensor_newSizeOf(state, src); \
- THFloatTensor *srcf = THFloatTensor_newWithSize(size, NULL); \
- \
- THFloatTensor_copyCuda(state, srcf, src); \
- TH##TYPEC##Tensor_copyFloat(self, srcf); \
- \
- THLongStorage_free(size); \
- THFloatTensor_free(srcf); \
- } \
+#define IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(TYPEC) \
+ void TH_CONCAT_4(TH,TYPEC,Tensor_copyCuda,Real)(THCState *state, TH##TYPEC##Tensor *self, struct THCTensor *src) \
+ { \
+ THArgCheck(TH##TYPEC##Tensor_nElement(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); \
+ if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
+ THTensor_(copyCuda)(state, (THTensor*) self, src); /* cast just removes compiler warning */ \
+ } else { \
+ THLongStorage *size = THCTensor_(newSizeOf)(state, src); \
+ THTensor *srcf = THTensor_(newWithSize)(size, NULL); \
+ \
+ THTensor_(copyCuda)(state, srcf, src); \
+ TH_CONCAT_4(TH,TYPEC,Tensor_copy,Real)(self, srcf); \
+ \
+ THLongStorage_free(size); \
+ THTensor_(free)(srcf); \
+ } \
}
IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Byte)
@@ -89,23 +91,25 @@ IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Char)
IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Short)
IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Int)
IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Long)
+IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Float)
IMPLEMENT_TH_CUDA_TENSOR_COPY_TO(Double)
-void THCudaTensor_copyCuda(THCState *state, THCudaTensor *self, THCudaTensor *src)
+// FIXME: add within-CUDA conversions
+void THCTensor_(copyCuda)(THCState *state, THCTensor *self, THCTensor *src)
{
- THCudaTensor_copy(state, self, src);
+ THCTensor_(copy)(state, self, src);
}
-void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THFloatTensor *src)
+void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, struct THTensor *src)
{
- THArgCheck(THCudaTensor_nElement(state, self) == THFloatTensor_nElement(src), 2, "sizes do not match");
- THArgCheck(THCudaTensor_isContiguous(state, self), 2, "Target tensor must be contiguous");
- THArgCheck(THFloatTensor_isContiguous(src), 3, "Source tensor must be contiguous");
+ THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match");
+ THArgCheck(THCTensor_(isContiguous)(state, self), 2, "Target tensor must be contiguous");
+ THArgCheck(THTensor_(isContiguous)(src), 3, "Source tensor must be contiguous");
- if (THCudaTensor_nElement(state, self) == 0) return;
+ if (THCTensor_(nElement)(state, self) == 0) return;
// Perform the copy wrt the current stream on the CudaTensor's device.
- int tensorDevice = THCudaTensor_getDevice(state, self);
+ int tensorDevice = THCTensor_(getDevice)(state, self);
int currentDevice;
THCudaCheck(cudaGetDevice(&currentDevice));
@@ -113,9 +117,9 @@ void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THF
THCudaCheck(cudaSetDevice(tensorDevice));
}
- THCudaCheck(cudaMemcpyAsync(THCudaTensor_data(state, self),
- THFloatTensor_data(src),
- THFloatTensor_nElement(src) * sizeof(float),
+ THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, self),
+ THTensor_(data)(src),
+ THTensor_(nElement)(src) * sizeof(real),
cudaMemcpyHostToDevice,
THCState_getDeviceStream(state, tensorDevice,
THCState_getCurrentStreamIndex(state))));
@@ -125,16 +129,16 @@ void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, struct THF
}
}
-void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct THCudaTensor *src)
+void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src)
{
- THArgCheck(THFloatTensor_nElement(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match");
- THArgCheck(THFloatTensor_isContiguous(self), 2, "Target tensor must be contiguous");
- THArgCheck(THCudaTensor_isContiguous(state, src), 3, "Source tensor must be contiguous");
+ THArgCheck(THTensor_(nElement)(self) == THCudaTensor_nElement(state, src), 2, "sizes do not match");
+ THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous");
+ THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous");
- if (THFloatTensor_nElement(self) == 0) return;
+ if (THTensor_(nElement)(self) == 0) return;
// Perform the copy wrt the current stream on the CudaTensor's device.
- int tensorDevice = THCudaTensor_getDevice(state, src);
+ int tensorDevice = THCTensor_(getDevice)(state, src);
int currentDevice;
THCudaCheck(cudaGetDevice(&currentDevice));
@@ -142,9 +146,9 @@ void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct TH
THCudaCheck(cudaSetDevice(tensorDevice));
}
- THCudaCheck(cudaMemcpyAsync(THFloatTensor_data(self),
- THCudaTensor_data(state, src),
- THCudaTensor_nElement(state, src) * sizeof(float),
+ THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self),
+ THCTensor_(data)(state, src),
+ THCTensor_(nElement)(state, src) * sizeof(real),
cudaMemcpyDeviceToHost,
THCState_getDeviceStream(state, tensorDevice,
THCState_getCurrentStreamIndex(state))));
@@ -153,3 +157,5 @@ void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, struct TH
THCudaCheck(cudaSetDevice(currentDevice));
}
}
+
+#endif
diff --git a/lib/THC/generic/THCTensorCopy.cu b/lib/THC/generic/THCTensorCopy.cu
index 5aa7ee5..304e52c 100644
--- a/lib/THC/generic/THCTensorCopy.cu
+++ b/lib/THC/generic/THCTensorCopy.cu
@@ -1,19 +1,15 @@
-#include "THCApply.cuh"
-
-static inline int curGPU() {
- int curDev;
- THCudaCheck(cudaGetDevice(&curDev));
- return curDev;
-}
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCTensorCopy.cu"
+#else
THC_API void
-THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) {
- long totalElements = THCudaTensor_nElement(state, dst);
+THCTensor_(copy)(THCState* state, THCTensor* dst, THCTensor* src) {
+ long totalElements = THCTensor_(nElement)(state, dst);
- THArgCheck(totalElements == THCudaTensor_nElement(state, src), 2,
+ THArgCheck(totalElements == THCTensor_(nElement)(state, src), 2,
"sizes do not match");
- if (THCudaTensor_nDimension(state, dst) == 0) {
+ if (THCTensor_(nDimension)(state, dst) == 0) {
// Zero-dim tensor; copy nothing
return;
}
@@ -24,12 +20,12 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) {
// -FIXME: if both tensors have matching size and stride arrays, and no
// holes within (in other words, there is some permutation that can be applied
// to the size/strides such that the resulting tensor is contiguous).
- bool srcContig = THCudaTensor_isContiguous(state, src);
- bool dstContig = THCudaTensor_isContiguous(state, dst);
+ bool srcContig = THCTensor_(isContiguous)(state, src);
+ bool dstContig = THCTensor_(isContiguous)(state, dst);
bool memcpyEligible = (srcContig && dstContig) || (totalElements == 1);
- int srcDev = THCudaTensor_getDevice(state, src);
- int dstDev = THCudaTensor_getDevice(state, dst);
+ int srcDev = THCTensor_(getDevice)(state, src);
+ int dstDev = THCTensor_(getDevice)(state, dst);
int oldDev = curGPU();
// We always perform the copy on the source device, using the
@@ -71,12 +67,13 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) {
// We are now on srcDev
if (memcpyEligible) {
// Perform the copy
- THCudaCheck(cudaMemcpyAsync(THCudaTensor_data(state, dst),
- THCudaTensor_data(state, src),
- totalElements * sizeof(float),
+ THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, dst),
+ THCTensor_(data)(state, src),
+ totalElements * sizeof(real),
cudaMemcpyDeviceToDevice,
copyStream));
} else {
+#ifdef THC_REAL_IS_FLOAT
// Non-contiguous copy
// We avoid creating temporary memory copies if possible.
@@ -139,6 +136,11 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) {
THCudaTensor_freeCopyTo(state, dstContig, dst);
}
}
+#else
+#define STRINGIFY(x) #x
+ THError("Non-contiguous copy not implemented for Cuda%sTensor", STRINGIFY(Real));
+#undef STRINGIFY
+#endif
}
if (srcDev != dstDev && copyStreamIndex == 0) {
@@ -170,3 +172,34 @@ THCudaTensor_copy(THCState* state, THCudaTensor* dst, THCudaTensor* src) {
THError(cudaGetErrorString(errcode));
}
}
+
+// conversions are mediated by the CPU
+// yes, this is slow; feel free to write CUDA kernels for this
+#define THC_CUDA_TENSOR_IMPLEMENT_COPY(TYPEC,TYPECUDA) \
+ void THCTensor_(copyCuda##TYPEC)(THCState *state, THCTensor *self, struct THCuda##TYPECUDA##Tensor *src) \
+ { \
+ if(THCTypeIdx_(Real) == THCTypeIdx_(TYPEC)) { \
+ THCTensor_(copy)(state, self, (THCTensor*) src); /* cast just removes compiler warning */ \
+ } else { \
+ THArgCheck(THCTensor_(nElement)(state, self) == THCuda##TYPECUDA##Tensor_nElement(state, src), 2, "size does not match"); \
+ THLongStorage *size = THCuda##TYPECUDA##Tensor_newSizeOf(state, src); \
+ TH##TYPEC##Tensor *buffer1 = TH##TYPEC##Tensor_newWithSize(size, NULL); \
+ THTensor *buffer2 = THTensor_(newWithSize)(size, NULL); \
+ TH##TYPEC##Tensor_copyCuda(state, buffer1, src); \
+ THTensor_(copy##TYPEC)(buffer2, buffer1); \
+ THCTensor_(copyCPU)(state, self, buffer2); \
+ THLongStorage_free(size); \
+ TH##TYPEC##Tensor_free(buffer1); \
+ THTensor_(free)(buffer2); \
+ } \
+ }
+
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Byte,Byte)
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Char,Char)
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Short,Short)
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Int,Int)
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Long,Long)
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Float,) // i.e. float
+THC_CUDA_TENSOR_IMPLEMENT_COPY(Double,Double)
+
+#endif
diff --git a/lib/THC/generic/THCTensorCopy.h b/lib/THC/generic/THCTensorCopy.h
index d843213..b71fe0f 100644
--- a/lib/THC/generic/THCTensorCopy.h
+++ b/lib/THC/generic/THCTensorCopy.h
@@ -1,28 +1,36 @@
-#ifndef TH_CUDA_TENSOR_COPY_INC
-#define TH_CUDA_TENSOR_COPY_INC
+#ifndef THC_GENERIC_FILE
+#define THC_GENERIC_FILE "generic/THCTensorCopy.h"
+#else
-#include "THCTensor.h"
-#include "THCGeneral.h"
+THC_API void THCTensor_(copy)(THCState *state, THCTensor *self, THCTensor *src);
+THC_API void THCTensor_(copyByte)(THCState *state, THCTensor *self, THByteTensor *src);
+THC_API void THCTensor_(copyChar)(THCState *state, THCTensor *self, THCharTensor *src);
+THC_API void THCTensor_(copyShort)(THCState *state, THCTensor *self, THShortTensor *src);
+THC_API void THCTensor_(copyInt)(THCState *state, THCTensor *self, THIntTensor *src);
+THC_API void THCTensor_(copyLong)(THCState *state, THCTensor *self, THLongTensor *src);
+THC_API void THCTensor_(copyFloat)(THCState *state, THCTensor *self, THFloatTensor *src);
+THC_API void THCTensor_(copyDouble)(THCState *state, THCTensor *self, THDoubleTensor *src);
-THC_API void THCudaTensor_copy(THCState *state, THCudaTensor *self, THCudaTensor *src);
-THC_API void THCudaTensor_copyByte(THCState *state, THCudaTensor *self, THByteTensor *src);
-THC_API void THCudaTensor_copyChar(THCState *state, THCudaTensor *self, THCharTensor *src);
-THC_API void THCudaTensor_copyShort(THCState *state, THCudaTensor *self, THShortTensor *src);
-THC_API void THCudaTensor_copyInt(THCState *state, THCudaTensor *self, THIntTensor *src);
-THC_API void THCudaTensor_copyLong(THCState *state, THCudaTensor *self, THLongTensor *src);
-THC_API void THCudaTensor_copyFloat(THCState *state, THCudaTensor *self, THFloatTensor *src);
-THC_API void THCudaTensor_copyDouble(THCState *state, THCudaTensor *self, THDoubleTensor *src);
+THC_API void THCTensor_(copyCudaByte)(THCState *state, THCTensor *storage, struct THCudaByteTensor *src);
+THC_API void THCTensor_(copyCudaChar)(THCState *state, THCTensor *storage, struct THCudaCharTensor *src);
+THC_API void THCTensor_(copyCudaShort)(THCState *state, THCTensor *storage, struct THCudaShortTensor *src);
+THC_API void THCTensor_(copyCudaInt)(THCState *state, THCTensor *storage, struct THCudaIntTensor *src);
+THC_API void THCTensor_(copyCudaLong)(THCState *state, THCTensor *storage, struct THCudaLongTensor *src);
+THC_API void THCTensor_(copyCudaFloat)(THCState *state, THCTensor *storage, struct THCudaTensor *src);
+THC_API void THCTensor_(copyCudaDouble)(THCState *state, THCTensor *storage, struct THCudaDoubleTensor *src);
-THC_API void THByteTensor_copyCuda(THCState *state, THByteTensor *self, THCudaTensor *src);
-THC_API void THCharTensor_copyCuda(THCState *state, THCharTensor *self, THCudaTensor *src);
-THC_API void THShortTensor_copyCuda(THCState *state, THShortTensor *self, THCudaTensor *src);
-THC_API void THIntTensor_copyCuda(THCState *state, THIntTensor *self, THCudaTensor *src);
-THC_API void THLongTensor_copyCuda(THCState *state, THLongTensor *self, THCudaTensor *src);
-THC_API void THFloatTensor_copyCuda(THCState *state, THFloatTensor *self, THCudaTensor *src);
-THC_API void THDoubleTensor_copyCuda(THCState *state, THDoubleTensor *self, THCudaTensor *src);
-THC_API void THCudaTensor_copyCuda(THCState *state, THCudaTensor *self, THCudaTensor *src);
+THC_API void TH_CONCAT_2(THByteTensor_copyCuda , Real) (THCState *state, THByteTensor *self, THCTensor *src);
+THC_API void TH_CONCAT_2(THCharTensor_copyCuda , Real) (THCState *state, THCharTensor *self, THCTensor *src);
+THC_API void TH_CONCAT_2(THShortTensor_copyCuda , Real) (THCState *state, THShortTensor *self, THCTensor *src);
+THC_API void TH_CONCAT_2(THIntTensor_copyCuda , Real) (THCState *state, THIntTensor *self, THCTensor *src);
+THC_API void TH_CONCAT_2(THLongTensor_copyCuda , Real) (THCState *state, THLongTensor *self, THCTensor *src);
+THC_API void TH_CONCAT_2(THFloatTensor_copyCuda , Real) (THCState *state, THFloatTensor *self, THCTensor *src);
+THC_API void TH_CONCAT_2(THDoubleTensor_copyCuda, Real) (THCState *state, THDoubleTensor *self, THCTensor *src);
+THC_API void THCTensor_(copyCuda) (THCState *state, THCTensor *self, THCTensor *src);
+THC_API void THTensor_(copyCuda) (THCState *state, THTensor *self, THCTensor *src);
+THC_API void THCTensor_(copyCPU) (THCState *state, THCTensor *self, THTensor *src);
-THC_API void THCudaTensor_copyAsyncFloat(THCState *state, THCudaTensor *self, THFloatTensor *src);
-THC_API void THFloatTensor_copyAsyncCuda(THCState *state, THFloatTensor *self, THCudaTensor *src);
+THC_API void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, THTensor *src);
+THC_API void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, THCTensor *src);
#endif
diff --git a/test/test.lua b/test/test.lua
index 845ad95..27d1a15 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -2035,6 +2035,106 @@ function test.multi_gpu_copy_noncontig()
end
end
+function test.cudaTypeCopy()
+
+ local types = {
+ {'float', 'FloatTensor'},
+ {'byte', 'ByteTensor'},
+ {'char', 'CharTensor'},
+ {'short', 'ShortTensor'},
+ {'long', 'LongTensor'},
+ {'double','DoubleTensor'},
+
+ {'cuda', 'CudaTensor'},
+ {'cudaByte', 'CudaByteTensor'},
+ {'cudaChar', 'CudaCharTensor'},
+ {'cudaShort', 'CudaShortTensor'},
+ {'cudaLong', 'CudaLongTensor'},
+ {'cudaDouble','CudaDoubleTensor'},
+ }
+
+ local N = 100
+ local t0 = torch.range(1,12):reshape(3,4)
+
+ -- t carries over from one iteration to the next
+ local t = t0:clone()
+ for i = 1, N do
+ -- convert to a random (CPU or GPU) type)
+ local conversionFunc, tensorSubtype = unpack(types[torch.random(#types)])
+ local tensorType = 'torch.' .. tensorSubtype
+
+ if torch.random(0,1) ~= 0 then
+ -- this is equivalent to t = t:float()
+ t = t[conversionFunc](t)
+ else
+ -- this is equivalent to t = torch.XTensor():copy(t)
+ t = torch[tensorSubtype](3,4):copy(t)
+ end
+
+ -- check the type
+ tester:assert(t:type() == tensorType, t:type() .. ' ~= ' .. tensorType)
+
+ -- check metadata
+ tester:assert(t:isContiguous())
+ tester:assert(t:size(1) == 3 and t:size(2) == 4)
+ tester:assert(t:nDimension() == 2)
+
+ -- check data
+ tester:assertTensorEq(t:double(), t0, 0)
+
+
+ -- check indexing
+ -- FIXME: doesn't work yet
+ -- tester:assert(ct[{1,1}] == 1)
+ end
+
+ -- check narrowing conversions
+ tester:assert(torch.Tensor(1):fill(500):cudaByte():float()[1] == 244)
+ tester:assert(torch.Tensor(1):fill(500):cudaChar():float()[1] == -12)
+end
+
+
+function test.cudaStorageTypeCopy()
+
+ local types = {
+ {'float', 'FloatStorage'},
+ {'byte', 'ByteStorage'},
+ {'char', 'CharStorage'},
+ {'short', 'ShortStorage'},
+ {'long', 'LongStorage'},
+ {'double','DoubleStorage'},
+
+ {'cuda', 'CudaStorage'},
+ {'cudaByte', 'CudaByteStorage'},
+ {'cudaChar', 'CudaCharStorage'},
+ {'cudaShort', 'CudaShortStorage'},
+ {'cudaLong', 'CudaLongStorage'},
+ {'cudaDouble','CudaDoubleStorage'},
+ }
+
+ local N = 100
+ local t0 = torch.range(1,12):reshape(3,4):storage()
+
+ -- t carries over from one iteration to the next
+ local t = torch.DoubleStorage(t0:size()):copy(t0)
+ for i = 1, N do
+ -- convert to a random (CPU or GPU) type)
+ local conversionFunc, storageSubtype = unpack(types[torch.random(#types)])
+ local storageType = 'torch.' .. storageSubtype
+
+ -- this is equivalent to t = torch.XStorage():copy(t)
+ t = torch[storageSubtype](12):copy(t)
+
+ -- check the type
+ tester:assert(torch.type(t) == storageType, torch.type(t) .. ' ~= ' .. storageType)
+
+ local d = torch.DoubleStorage(12):copy(t)
+ for i = 1, t:size() do
+ tester:assert(d[i] == t0[i], storageSubtype .. ': ' .. i .. ': ' .. d[i] .. ' ~= ' .. t0[i])
+ end
+ end
+end
+
function test.maskedSelect()
local n_row = math.random(minsize,maxsize)
local n_col = math.random(minsize,maxsize)
diff --git a/torch/generic/Storage.c b/torch/generic/Storage.c
index 6f0188d..063704a 100644
--- a/torch/generic/Storage.c
+++ b/torch/generic/Storage.c
@@ -5,34 +5,34 @@
static int torch_Storage_(new)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THStorage *storage;
+ THCStorage *storage;
if(lua_type(L, 1) == LUA_TSTRING)
{
const char *fileName = luaL_checkstring(L, 1);
int isShared = luaT_optboolean(L, 2, 0);
long size = luaL_optlong(L, 3, 0);
- storage = THStorage_(newWithMapping)(state, fileName, size, isShared);
+ storage = THCStorage_(newWithMapping)(state, fileName, size, isShared);
}
else if(lua_type(L, 1) == LUA_TTABLE)
{
long size = lua_objlen(L, 1);
long i;
- storage = THStorage_(newWithSize)(state, size);
+ storage = THCStorage_(newWithSize)(state, size);
for(i = 1; i <= size; i++)
{
lua_rawgeti(L, 1, i);
if(!lua_isnumber(L, -1))
{
- THStorage_(free)(state, storage);
+ THCStorage_(free)(state, storage);
luaL_error(L, "element at index %d is not a number", i);
}
- THStorage_(set)(state, storage, i-1, (real)lua_tonumber(L, -1));
+ THCStorage_(set)(state, storage, i-1, (real)lua_tonumber(L, -1));
lua_pop(L, 1);
}
}
else if(lua_type(L, 1) == LUA_TUSERDATA)
{
- THStorage *src = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *src = luaT_checkudata(L, 1, torch_Storage);
real *ptr = src->data;
long offset = luaL_optlong(L, 2, 1) - 1;
if (offset < 0 || offset >= src->size) {
@@ -42,22 +42,22 @@ static int torch_Storage_(new)(lua_State *L)
if (size < 1 || size > (src->size - offset)) {
luaL_error(L, "size out of bounds");
}
- storage = THStorage_(newWithData)(state, ptr + offset, size);
+ storage = THCStorage_(newWithData)(state, ptr + offset, size);
storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_VIEW;
storage->view = src;
- THStorage_(retain)(state, storage->view);
+ THCStorage_(retain)(state, storage->view);
}
else if(lua_type(L, 2) == LUA_TNUMBER)
{
long size = luaL_optlong(L, 1, 0);
real *ptr = (real *)luaL_optlong(L, 2, 0);
- storage = THStorage_(newWithData)(state, ptr, size);
+ storage = THCStorage_(newWithData)(state, ptr, size);
storage->flag = TH_STORAGE_REFCOUNTED;
}
else
{
long size = luaL_optlong(L, 1, 0);
- storage = THStorage_(newWithSize)(state, size);
+ storage = THCStorage_(newWithSize)(state, size);
}
luaT_pushudata(L, storage, torch_Storage);
return 1;
@@ -65,24 +65,24 @@ static int torch_Storage_(new)(lua_State *L)
static int torch_Storage_(retain)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- THStorage_(retain)(cutorch_getstate(L), storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage_(retain)(cutorch_getstate(L), storage);
return 0;
}
static int torch_Storage_(free)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
- THStorage_(free)(cutorch_getstate(L), storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage_(free)(cutorch_getstate(L), storage);
return 0;
}
static int torch_Storage_(resize)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
long size = luaL_checklong(L, 2);
/* int keepContent = luaT_optboolean(L, 3, 0); */
- THStorage_(resize)(cutorch_getstate(L), storage, size);/*, keepContent); */
+ THCStorage_(resize)(cutorch_getstate(L), storage, size);/*, keepContent); */
lua_settop(L, 1);
return 1;
}
@@ -90,24 +90,24 @@ static int torch_Storage_(resize)(lua_State *L)
static int torch_Storage_(copy)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
void *src;
if( (src = luaT_toudata(L, 2, torch_Storage)) )
- THStorage_(copy)(state, storage, src);
+ THCStorage_(copy)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) )
- THStorage_(copyByte)(state, storage, src);
+ THCStorage_(copyByte)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) )
- THStorage_(copyChar)(state, storage, src);
+ THCStorage_(copyChar)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) )
- THStorage_(copyShort)(state, storage, src);
+ THCStorage_(copyShort)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) )
- THStorage_(copyInt)(state, storage, src);
+ THCStorage_(copyInt)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) )
- THStorage_(copyLong)(state, storage, src);
+ THCStorage_(copyLong)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) )
- THStorage_(copyFloat)(state, storage, src);
+ THCStorage_(copyFloat)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) )
- THStorage_(copyDouble)(state, storage, src);
+ THCStorage_(copyDouble)(state, storage, src);
else
luaL_typerror(L, 2, "torch.*Storage");
lua_settop(L, 1);
@@ -116,9 +116,9 @@ static int torch_Storage_(copy)(lua_State *L)
static int torch_Storage_(fill)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
double value = luaL_checknumber(L, 2);
- THStorage_(fill)(cutorch_getstate(L), storage, (real)value);
+ THCStorage_(fill)(cutorch_getstate(L), storage, (real)value);
lua_settop(L, 1);
return 1;
}
@@ -131,7 +131,7 @@ static int torch_Storage_(elementSize)(lua_State *L)
static int torch_Storage_(__len__)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
lua_pushnumber(L, storage->size);
return 1;
}
@@ -140,10 +140,10 @@ static int torch_Storage_(__newindex__)(lua_State *L)
{
if(lua_isnumber(L, 2))
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
long index = luaL_checklong(L, 2) - 1;
double number = luaL_checknumber(L, 3);
- THStorage_(set)(cutorch_getstate(L), storage, index, (real)number);
+ THCStorage_(set)(cutorch_getstate(L), storage, index, (real)number);
lua_pushboolean(L, 1);
}
else
@@ -156,9 +156,9 @@ static int torch_Storage_(__index__)(lua_State *L)
{
if(lua_isnumber(L, 2))
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
long index = luaL_checklong(L, 2) - 1;
- lua_pushnumber(L, THStorage_(get)(cutorch_getstate(L), storage, index));
+ lua_pushnumber(L, THCStorage_(get)(cutorch_getstate(L), storage, index));
lua_pushboolean(L, 1);
return 2;
}
@@ -172,12 +172,12 @@ static int torch_Storage_(__index__)(lua_State *L)
#if defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_BYTE)
static int torch_Storage_(string)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
if(lua_isstring(L, -1))
{
size_t len = 0;
const char *str = lua_tolstring(L, -1, &len);
- THStorage_(resize)(cutorch_getstate(L), storage, len);
+ THCStorage_(resize)(cutorch_getstate(L), storage, len);
memmove(storage->data, str, len);
lua_settop(L, 1);
}
@@ -190,7 +190,7 @@ static int torch_Storage_(string)(lua_State *L)
static int torch_Storage_(totable)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
long i;
lua_newtable(L);
@@ -204,14 +204,14 @@ static int torch_Storage_(totable)(lua_State *L)
static int torch_Storage_(factory)(lua_State *L)
{
- THStorage *storage = THStorage_(new)(cutorch_getstate(L));
+ THCStorage *storage = THCStorage_(new)(cutorch_getstate(L));
luaT_pushudata(L, storage, torch_Storage);
return 1;
}
static int torch_Storage_(write)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
THFile *file = luaT_checkudata(L, 2, "torch.File");
THFile_writeLongScalar(file, storage->size);
@@ -222,11 +222,11 @@ static int torch_Storage_(write)(lua_State *L)
static int torch_Storage_(read)(lua_State *L)
{
- THStorage *storage = luaT_checkudata(L, 1, torch_Storage);
+ THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
THFile *file = luaT_checkudata(L, 2, "torch.File");
long size = THFile_readLongScalar(file);
- THStorage_(resize)(cutorch_getstate(L), storage, size);
+ THCStorage_(resize)(cutorch_getstate(L), storage, size);
THFile_readRealRaw(file, storage->data, storage->size);
return 0;
diff --git a/torch/generic/Tensor.c b/torch/generic/Tensor.c
index c837050..bbed718 100644
--- a/torch/generic/Tensor.c
+++ b/torch/generic/Tensor.c
@@ -3,13 +3,13 @@
#else
static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride,
- THStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_);
+ THCStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_);
static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_);
static int torch_Tensor_(size)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
if(lua_isnumber(L,2))
{
int dim = luaL_checkint(L, 2)-1;
@@ -33,7 +33,7 @@ static int torch_Tensor_(elementSize)(lua_State *L)
static int torch_Tensor_(stride)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
if(lua_isnumber(L,2))
{
int dim = luaL_checkint(L, 2)-1;
@@ -51,17 +51,17 @@ static int torch_Tensor_(stride)(lua_State *L)
static int torch_Tensor_(nDimension)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
lua_pushnumber(L, tensor->nDimension);
return 1;
}
static int torch_Tensor_(storage)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
if(tensor->storage)
{
- THStorage_(retain)(cutorch_getstate(L), tensor->storage);
+ THCStorage_(retain)(cutorch_getstate(L), tensor->storage);
luaT_pushudata(L, tensor->storage, torch_Storage);
}
else
@@ -72,7 +72,7 @@ static int torch_Tensor_(storage)(lua_State *L)
static int torch_Tensor_(storageOffset)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
lua_pushnumber(L, tensor->storageOffset+1);
return 1;
}
@@ -80,7 +80,7 @@ static int torch_Tensor_(storageOffset)(lua_State *L)
static int torch_Tensor_(new)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor;
+ THCTensor *tensor;
long storageOffset;
THLongStorage *size, *stride;
@@ -107,7 +107,7 @@ static int torch_Tensor_(new)(lua_State *L)
counter = THLongStorage_newWithSize(size->size);
THLongStorage_fill(counter, 0);
- tensor = THTensor_(newWithSize)(state, size, NULL);
+ tensor = THCTensor_(newWithSize)(state, size, NULL);
if(size->size == 0)
is_finished = 1;
@@ -118,7 +118,7 @@ static int torch_Tensor_(new)(lua_State *L)
{
THLongStorage_free(size);
THLongStorage_free(counter);
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
luaL_error(L, "invalid tensor definition");
}
@@ -126,7 +126,7 @@ static int torch_Tensor_(new)(lua_State *L)
{
THLongStorage_free(size);
THLongStorage_free(counter);
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
luaL_error(L, "invalid tensor sizes");
}
@@ -137,10 +137,10 @@ static int torch_Tensor_(new)(lua_State *L)
{
THLongStorage_free(size);
THLongStorage_free(counter);
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
luaL_error(L, "invalid element (not a number)");
}
- THStorage_(set)(state, THTensor_(storage)(state, tensor), si++, (real)lua_tonumber(L, -1));
+ THCStorage_(set)(state, THCTensor_(storage)(state, tensor), si++, (real)lua_tonumber(L, -1));
lua_pop(L, 1);
}
@@ -171,14 +171,14 @@ static int torch_Tensor_(new)(lua_State *L)
{
THLongStorage_free(size);
THLongStorage_free(counter);
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
luaL_error(L, "invalid tensor definition");
}
if(lua_objlen(L, -1) != size->data[j])
{
THLongStorage_free(size);
THLongStorage_free(counter);
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
luaL_error(L, "invalid tensor sizes");
}
lua_rawgeti(L, -1, counter->data[j]+1);
@@ -193,12 +193,12 @@ static int torch_Tensor_(new)(lua_State *L)
}
else
{
- THStorage *storage;
+ THCStorage *storage;
torch_Tensor_(c_readTensorStorageSizeStride)(L, 1, 1, 1, 1, 1,
&storage, &storageOffset, &size, &stride);
- tensor = THTensor_(newWithStorage)(state, storage, storageOffset, size, stride);
+ tensor = THCTensor_(newWithStorage)(state, storage, storageOffset, size, stride);
THLongStorage_free(size);
THLongStorage_free(stride);
@@ -210,15 +210,15 @@ static int torch_Tensor_(new)(lua_State *L)
static int torch_Tensor_(set)(lua_State *L)
{
- THTensor *self = luaT_checkudata(L, 1, torch_Tensor);
- THStorage *storage;
+ THCTensor *self = luaT_checkudata(L, 1, torch_Tensor);
+ THCStorage *storage;
long storageOffset;
THLongStorage *size, *stride;
torch_Tensor_(c_readTensorStorageSizeStride)(L, 2, 1, 1, 1, 1,
&storage, &storageOffset, &size, &stride);
- THTensor_(setStorage)(cutorch_getstate(L), self, storage, storageOffset, size, stride);
+ THCTensor_(setStorage)(cutorch_getstate(L), self, storage, storageOffset, size, stride);
THLongStorage_free(size);
THLongStorage_free(stride);
@@ -229,16 +229,16 @@ static int torch_Tensor_(set)(lua_State *L)
static int torch_Tensor_(clone)(lua_State *L)
{
- THTensor *self = luaT_checkudata(L, 1, torch_Tensor);
- self = THTensor_(newClone)(cutorch_getstate(L), self);
+ THCTensor *self = luaT_checkudata(L, 1, torch_Tensor);
+ self = THCTensor_(newClone)(cutorch_getstate(L), self);
luaT_pushudata(L, self, torch_Tensor);
return 1;
}
static int torch_Tensor_(contiguous)(lua_State *L)
{
- THTensor *self = luaT_checkudata(L, 1, torch_Tensor);
- self = THTensor_(newContiguous)(cutorch_getstate(L), self);
+ THCTensor *self = luaT_checkudata(L, 1, torch_Tensor);
+ self = THCTensor_(newContiguous)(cutorch_getstate(L), self);
luaT_pushudata(L, self, torch_Tensor);
return 1;
}
@@ -246,21 +246,21 @@ static int torch_Tensor_(contiguous)(lua_State *L)
/* Resize */
static int torch_Tensor_(resizeAs)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
- THTensor_(resizeAs)(cutorch_getstate(L), tensor, src);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+ THCTensor_(resizeAs)(cutorch_getstate(L), tensor, src);
lua_settop(L, 1);
return 1;
}
static int torch_Tensor_(resize)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
THLongStorage *size, *stride;
torch_Tensor_(c_readSizeStride)(L, 2, 0, &size, &stride);
- THTensor_(resize)(cutorch_getstate(L), tensor, size, stride);
+ THCTensor_(resize)(cutorch_getstate(L), tensor, size, stride);
THLongStorage_free(size);
THLongStorage_free(stride);
@@ -272,7 +272,7 @@ static int torch_Tensor_(resize)(lua_State *L)
static int torch_Tensor_(narrow)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
int dimension = luaL_checkint(L, 2)-1;
long firstIndex = luaL_checklong(L, 3)-1;
long size = luaL_checklong(L, 4);
@@ -281,8 +281,8 @@ static int torch_Tensor_(narrow)(lua_State *L)
THArgCheck( (firstIndex >= 0) && (firstIndex < tensor->size[dimension]), 3, "out of range");
THArgCheck( (size > 0) && (firstIndex+size <= tensor->size[dimension]), 4, "out of range");
*/
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, dimension, firstIndex, size);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, dimension, firstIndex, size);
luaT_pushudata(L, tensor, torch_Tensor);
return 1;
}
@@ -290,7 +290,7 @@ static int torch_Tensor_(narrow)(lua_State *L)
static int torch_Tensor_(sub)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
long d0s = -1, d0e = -1, d1s = -1, d1e = -1, d2s = -1, d2e = -1, d3s = -1, d3e = -1;
d0s = luaL_checklong(L, 2)-1;
@@ -346,14 +346,14 @@ static int torch_Tensor_(sub)(lua_State *L)
}
}
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, d0s, d0e-d0s+1);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, d0s, d0e-d0s+1);
if(d1s >= 0)
- THTensor_(narrow)(state, tensor, NULL, 1, d1s, d1e-d1s+1);
+ THCTensor_(narrow)(state, tensor, NULL, 1, d1s, d1e-d1s+1);
if(d2s >= 0)
- THTensor_(narrow)(state, tensor, NULL, 2, d2s, d2e-d2s+1);
+ THCTensor_(narrow)(state, tensor, NULL, 2, d2s, d2e-d2s+1);
if(d3s >= 0)
- THTensor_(narrow)(state, tensor, NULL, 3, d3s, d3e-d3s+1);
+ THCTensor_(narrow)(state, tensor, NULL, 3, d3s, d3e-d3s+1);
luaT_pushudata(L, tensor, torch_Tensor);
return 1;
}
@@ -361,7 +361,7 @@ static int torch_Tensor_(sub)(lua_State *L)
static int torch_Tensor_(select)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
int dimension = luaL_checkint(L, 2)-1;
long sliceIndex = luaL_checklong(L, 3)-1;
@@ -372,29 +372,30 @@ static int torch_Tensor_(select)(lua_State *L)
if(tensor->nDimension > 1)
{
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(select)(state, tensor, NULL, dimension, sliceIndex);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(select)(state, tensor, NULL, dimension, sliceIndex);
luaT_pushudata(L, tensor, torch_Tensor);
}
else
{
THArgCheck(tensor->nDimension == 1, 1, "empty Tensor");
- lua_pushnumber(L, THTensor_(get1d)(state, tensor, sliceIndex));
+ lua_pushnumber(L, THCTensor_(get1d)(state, tensor, sliceIndex));
}
return 1;
}
+#ifdef THC_REAL_IS_FLOAT
static int torch_Tensor_(indexSelect)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
int narg = lua_gettop(L);
- THTensor *tensor, *src, *index;
+ THCTensor *tensor, *src, *index;
THLongTensor *longIndex;
int dim;
if (narg == 3)
{
- tensor = THTensor_(new)(state);
+ tensor = THCTensor_(new)(state);
src = luaT_checkudata(L, 1, torch_Tensor);
dim = luaL_checkint(L, 2) - 1;
index = luaT_toudata(L, 3, torch_Tensor);
@@ -418,9 +419,9 @@ static int torch_Tensor_(indexSelect)(lua_State *L)
}
if (index)
- THTensor_(indexSelect)(state, tensor,src,dim,index);
+ THCTensor_(indexSelect)(state, tensor,src,dim,index);
else
- THTensor_(indexSelect_long)(state, tensor,src,dim,longIndex);
+ THCTensor_(indexSelect_long)(state, tensor,src,dim,longIndex);
return 1;
}
@@ -428,7 +429,7 @@ static int torch_Tensor_(indexSelect)(lua_State *L)
static int torch_Tensor_(indexCopy)(lua_State *L)
{
int narg = lua_gettop(L);
- THTensor *tensor, *src, *index;
+ THCTensor *tensor, *src, *index;
THLongTensor *longIndex;
int dim;
if(narg == 4)
@@ -447,9 +448,9 @@ static int torch_Tensor_(indexCopy)(lua_State *L)
}
if (index)
- THTensor_(indexCopy)(cutorch_getstate(L), tensor,dim,index,src);
+ THCTensor_(indexCopy)(cutorch_getstate(L), tensor,dim,index,src);
else
- THTensor_(indexCopy_long)(cutorch_getstate(L), tensor,dim,longIndex,src);
+ THCTensor_(indexCopy_long)(cutorch_getstate(L), tensor,dim,longIndex,src);
return 1;
}
@@ -457,7 +458,7 @@ static int torch_Tensor_(indexCopy)(lua_State *L)
static int torch_Tensor_(indexAdd)(lua_State *L)
{
int narg = lua_gettop(L);
- THTensor *tensor, *src, *index;
+ THCTensor *tensor, *src, *index;
THLongTensor *longIndex;
int dim;
if(narg == 4)
@@ -476,9 +477,9 @@ static int torch_Tensor_(indexAdd)(lua_State *L)
}
if (index)
- THTensor_(indexAdd)(cutorch_getstate(L), tensor,dim,index,src);
+ THCTensor_(indexAdd)(cutorch_getstate(L), tensor,dim,index,src);
else
- THTensor_(indexAdd_long)(cutorch_getstate(L), tensor,dim,longIndex,src);
+ THCTensor_(indexAdd_long)(cutorch_getstate(L), tensor,dim,longIndex,src);
return 1;
}
@@ -486,7 +487,7 @@ static int torch_Tensor_(indexAdd)(lua_State *L)
static int torch_Tensor_(indexFill)(lua_State *L)
{
int narg = lua_gettop(L);
- THTensor *tensor, *index;
+ THCTensor *tensor, *index;
THLongTensor *longIndex;
real val;
int dim;
@@ -506,17 +507,19 @@ static int torch_Tensor_(indexFill)(lua_State *L)
}
if (index)
- THTensor_(indexFill)(cutorch_getstate(L), tensor,dim,index,val);
+ THCTensor_(indexFill)(cutorch_getstate(L), tensor,dim,index,val);
else
- THTensor_(indexFill_long)(cutorch_getstate(L), tensor,dim,longIndex,val);
+ THCTensor_(indexFill_long)(cutorch_getstate(L), tensor,dim,longIndex,val);
return 1;
}
+#endif
+
static int torch_Tensor_(transpose)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
int dimension1 = luaL_checkint(L, 2)-1;
int dimension2 = luaL_checkint(L, 3)-1;
@@ -525,8 +528,8 @@ static int torch_Tensor_(transpose)(lua_State *L)
THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 3, "out of range");
*/
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(transpose)(state, tensor, NULL, dimension1, dimension2);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(transpose)(state, tensor, NULL, dimension1, dimension2);
luaT_pushudata(L, tensor, torch_Tensor);
return 1;
}
@@ -534,12 +537,12 @@ static int torch_Tensor_(transpose)(lua_State *L)
static int torch_Tensor_(t)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
luaL_argcheck(L, tensor->nDimension == 2, 1, "Tensor must have 2 dimensions");
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(transpose)(state, tensor, NULL, 0, 1);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(transpose)(state, tensor, NULL, 0, 1);
luaT_pushudata(L, tensor, torch_Tensor);
return 1;
}
@@ -547,7 +550,7 @@ static int torch_Tensor_(t)(lua_State *L)
static int torch_Tensor_(unfold)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
int dimension = luaL_checkint(L, 2)-1;
long size = luaL_checklong(L, 3);
long step = luaL_checklong(L, 4);
@@ -558,8 +561,8 @@ static int torch_Tensor_(unfold)(lua_State *L)
THArgCheck(size <= src->size[dimension], 3, "out of range");
*/
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(unfold)(state, tensor, NULL, dimension, size, step);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(unfold)(state, tensor, NULL, dimension, size, step);
luaT_pushudata(L, tensor, torch_Tensor);
return 1;
}
@@ -567,73 +570,76 @@ static int torch_Tensor_(unfold)(lua_State *L)
/* is contiguous? [a bit like in TnXIterator] */
static int torch_Tensor_(isContiguous)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- lua_pushboolean(L, THTensor_(isContiguous)(cutorch_getstate(L), tensor));
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ lua_pushboolean(L, THCTensor_(isContiguous)(cutorch_getstate(L), tensor));
return 1;
}
static int torch_Tensor_(isSize)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
THLongStorage *size = luaT_checkudata(L, 2, "torch.LongStorage");
- lua_pushboolean(L, THTensor_(isSize)(cutorch_getstate(L), tensor, size));
+ lua_pushboolean(L, THCTensor_(isSize)(cutorch_getstate(L), tensor, size));
return 1;
}
static int torch_Tensor_(isSetTo)(lua_State *L)
{
- THTensor *self = luaT_checkudata(L, 1, torch_Tensor);
- THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
- lua_pushboolean(L, THTensor_(isSetTo)(cutorch_getstate(L), self, src));
+ THCTensor *self = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+ lua_pushboolean(L, THCTensor_(isSetTo)(cutorch_getstate(L), self, src));
return 1;
}
static int torch_Tensor_(isSameSizeAs)(lua_State *L)
{
- THTensor *self = luaT_checkudata(L, 1, torch_Tensor);
- THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
- lua_pushboolean(L, THTensor_(isSameSizeAs)(cutorch_getstate(L), self, src));
+ THCTensor *self = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+ lua_pushboolean(L, THCTensor_(isSameSizeAs)(cutorch_getstate(L), self, src));
return 1;
}
static int torch_Tensor_(nElement)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- lua_pushnumber(L, THTensor_(nElement)(cutorch_getstate(L), tensor));
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ lua_pushnumber(L, THCTensor_(nElement)(cutorch_getstate(L), tensor));
return 1;
}
static int torch_Tensor_(copy)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
void *src;
if( (src = luaT_toudata(L, 2, torch_Tensor)) )
- THTensor_(copy)(state, tensor, src);
+ THCTensor_(copy)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) )
- THTensor_(copyByte)(state, tensor, src);
+ THCTensor_(copyByte)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) )
- THTensor_(copyChar)(state, tensor, src);
+ THCTensor_(copyChar)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) )
- THTensor_(copyShort)(state, tensor, src);
+ THCTensor_(copyShort)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) )
- THTensor_(copyInt)(state, tensor, src);
+ THCTensor_(copyInt)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) )
- THTensor_(copyLong)(state, tensor, src);
+ THCTensor_(copyLong)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) )
- THTensor_(copyFloat)(state, tensor, src);
+ THCTensor_(copyFloat)(state, tensor, src);
else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) )
- THTensor_(copyDouble)(state, tensor, src);
+ THCTensor_(copyDouble)(state, tensor, src);
else
luaL_typerror(L, 2, "torch.*Tensor");
lua_settop(L, 1);
return 1;
}
+
+
+#ifdef THC_REAL_IS_FLOAT
static int torch_Tensor_(__newindex__)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
THLongStorage *idx = NULL;
THByteTensor *mask;
THCudaTensor *maskCuda;
@@ -649,53 +655,53 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
real value = (real)luaL_checknumber(L,3);
if (tensor->nDimension == 1) {
luaL_argcheck(L, index >= 0 && index < tensor->size[0], 2, "out of range");
- THStorage_(set)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0], value);
+ THCStorage_(set)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0], value);
} else {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(fill)(state, tensor, value);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(fill)(state, tensor, value);
+ THCTensor_(free)(state, tensor);
}
} else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copy)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copy)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.ByteTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyByte)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyByte)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.CharTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyChar)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyChar)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.ShortTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyShort)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyShort)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.IntTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyInt)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyInt)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.LongTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyLong)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyLong)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.FloatTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyFloat)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyFloat)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else if( (src = luaT_toudata(L, 3, "torch.DoubleTensor")) ) {
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(narrow)(state, tensor, NULL, 0, index, 1);
- THTensor_(copyDouble)(state, tensor, src);
- THTensor_(free)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(narrow)(state, tensor, NULL, 0, index, 1);
+ THCTensor_(copyDouble)(state, tensor, src);
+ THCTensor_(free)(state, tensor);
} else {
luaL_typerror(L, 3, "torch.*Tensor");
}
@@ -703,7 +709,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
}
else if((idx = luaT_toudata(L, 2, "torch.LongStorage")))
{
- long index = THTensor_(storageOffset)(state, tensor);
+ long index = THCTensor_(storageOffset)(state, tensor);
real value = (real)luaL_checknumber(L,3);
int dim;
@@ -717,7 +723,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
index += z*tensor->stride[dim];
}
- THStorage_(set)(state, tensor->storage, index, value);
+ THCStorage_(set)(state, tensor->storage, index, value);
lua_pushboolean(L, 1);
}
else if(lua_istable(L, 2))
@@ -728,7 +734,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
int done = 0;
ndims = tensor->nDimension;
luaL_argcheck(L, lua_objlen(L, 2) <= ndims, 2, "too many indices provided");
- tensor = THTensor_(newWithTensor)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
for(dim = 0; dim < ndims; dim++)
{
lua_rawgeti(L, 2, dim+1);
@@ -741,9 +747,9 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
if(tensor->nDimension == 1) {
real value = (real)luaL_checknumber(L,3);
done = 1;
- THStorage_(set)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0], value);
+ THCStorage_(set)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0], value);
} else {
- THTensor_(select)(state, tensor, NULL, cdim, z);
+ THCTensor_(select)(state, tensor, NULL, cdim, z);
}
}
else if (lua_istable(L, -1))
@@ -769,7 +775,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
luaL_argcheck(L, (end >= start), 2, "end index must be greater or equal to start index");
- THTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1);
+ THCTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1);
}
else
{
@@ -780,41 +786,41 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
/* doing a copy */
void *src;
if (lua_isnumber(L,3)) {
- THTensor_(fill)(state, tensor, lua_tonumber(L,3));
+ THCTensor_(fill)(state, tensor, lua_tonumber(L,3));
} else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) {
- THTensor_(copy)(state, tensor, src);
+ THCTensor_(copy)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.ByteTensor")) ) {
- THTensor_(copyByte)(state, tensor, src);
+ THCTensor_(copyByte)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.CharTensor")) ) {
- THTensor_(copyChar)(state, tensor, src);
+ THCTensor_(copyChar)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.ShortTensor")) ) {
- THTensor_(copyShort)(state, tensor, src);
+ THCTensor_(copyShort)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.IntTensor")) ) {
- THTensor_(copyInt)(state, tensor, src);
+ THCTensor_(copyInt)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.LongTensor")) ) {
- THTensor_(copyLong)(state, tensor, src);
+ THCTensor_(copyLong)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.FloatTensor")) ) {
- THTensor_(copyFloat)(state, tensor, src);
+ THCTensor_(copyFloat)(state, tensor, src);
} else if( (src = luaT_toudata(L, 3, "torch.DoubleTensor")) ) {
- THTensor_(copyDouble)(state, tensor, src);
+ THCTensor_(copyDouble)(state, tensor, src);
} else {
luaL_typerror(L, 3, "torch.*Tensor");
}
}
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
lua_pushboolean(L, 1);
}
else if((mask = luaT_toudata(L, 2, "torch.ByteTensor")))
{
- THTensor *vals;
+ THCTensor *vals;
if (lua_isnumber(L, 3))
{
- THTensor_(maskedFillByte)(state, tensor, mask,
+ THCTensor_(maskedFillByte)(state, tensor, mask,
(real)(luaL_checknumber(L,3)));
}
else if((vals = luaT_toudata(L, 3, torch_Tensor)))
{
- THTensor_(maskedCopyByte)(state, tensor, mask, vals);
+ THCTensor_(maskedCopyByte)(state, tensor, mask, vals);
}
else
{
@@ -823,15 +829,15 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
}
else if((maskCuda = luaT_toudata(L, 2, "torch.CudaTensor")))
{
- THTensor *vals;
+ THCTensor *vals;
if (lua_isnumber(L, 3))
{
- THTensor_(maskedFill)(state, tensor, maskCuda,
+ THCTensor_(maskedFill)(state, tensor, maskCuda,
(real)(luaL_checknumber(L,3)));
}
else if((vals = luaT_toudata(L, 3, torch_Tensor)))
{
- THTensor_(maskedCopy)(state, tensor, maskCuda, vals);
+ THCTensor_(maskedCopy)(state, tensor, maskCuda, vals);
}
else
{
@@ -847,7 +853,7 @@ static int torch_Tensor_(__newindex__)(lua_State *L)
static int torch_Tensor_(__index__)(lua_State *L)
{
THCState *state = cutorch_getstate(L);
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
THLongStorage *idx = NULL;
THByteTensor *mask;
THCudaTensor *maskCuda;
@@ -862,12 +868,12 @@ static int torch_Tensor_(__index__)(lua_State *L)
if(tensor->nDimension == 1)
{
- lua_pushnumber(L, THStorage_(get)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0]));
+ lua_pushnumber(L, THCStorage_(get)(state, tensor->storage, tensor->storageOffset+index*tensor->stride[0]));
}
else
{
- tensor = THTensor_(newWithTensor)(state, tensor);
- THTensor_(select)(state, tensor, NULL, 0, index);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
+ THCTensor_(select)(state, tensor, NULL, 0, index);
luaT_pushudata(L, tensor, torch_Tensor);
}
lua_pushboolean(L, 1);
@@ -875,7 +881,7 @@ static int torch_Tensor_(__index__)(lua_State *L)
}
else if((idx = luaT_toudata(L, 2, "torch.LongStorage")))
{
- long index = THTensor_(storageOffset)(state, tensor);
+ long index = THCTensor_(storageOffset)(state, tensor);
int dim;
luaL_argcheck(L, idx->size == tensor->nDimension, 2, "invalid size");
@@ -887,7 +893,7 @@ static int torch_Tensor_(__index__)(lua_State *L)
luaL_argcheck(L, (z >= 0) && (z < tensor->size[dim]), 2, "index out of bound");
index += z*tensor->stride[dim];
}
- lua_pushnumber(L, (double)THStorage_(get)(state, THTensor_(storage)(state, tensor), index));
+ lua_pushnumber(L, (double)THCStorage_(get)(state, THCTensor_(storage)(state, tensor), index));
lua_pushboolean(L, 1);
return 2;
}
@@ -900,7 +906,7 @@ static int torch_Tensor_(__index__)(lua_State *L)
ndims = tensor->nDimension;
luaL_argcheck(L, lua_objlen(L, 2) <= ndims, 2, "too many indices provided");
- tensor = THTensor_(newWithTensor)(state, tensor);
+ tensor = THCTensor_(newWithTensor)(state, tensor);
for(dim = 0; dim < ndims; dim++)
{
@@ -913,9 +919,9 @@ static int torch_Tensor_(__index__)(lua_State *L)
luaL_argcheck(L, (z >= 0) && (z < tensor->size[cdim]), 2, "index out of bound");
if(tensor->nDimension == 1) {
done = 1;
- lua_pushnumber(L, THStorage_(get)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0]));
+ lua_pushnumber(L, THCStorage_(get)(state, tensor->storage, tensor->storageOffset+z*tensor->stride[0]));
} else {
- THTensor_(select)(state, tensor, NULL, cdim, z);
+ THCTensor_(select)(state, tensor, NULL, cdim, z);
}
}
else if (lua_istable(L, -1))
@@ -941,7 +947,7 @@ static int torch_Tensor_(__index__)(lua_State *L)
luaL_argcheck(L, (end >= start), 2, "end index must be greater or equal to start index");
- THTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1);
+ THCTensor_(narrow)(state, tensor, NULL, cdim++, start, end-start+1);
}
else
{
@@ -951,23 +957,23 @@ static int torch_Tensor_(__index__)(lua_State *L)
if(!done) {
luaT_pushudata(L, tensor, torch_Tensor);
} else {
- THTensor_(free)(state, tensor);
+ THCTensor_(free)(state, tensor);
}
lua_pushboolean(L, 1);
return 2;
}
else if((mask = luaT_toudata(L, 2, "torch.ByteTensor")))
{
- THTensor *vals = THTensor_(new)(state);
- THTensor_(maskedSelectByte)(state, vals, tensor, mask);
+ THCTensor *vals = THCTensor_(new)(state);
+ THCTensor_(maskedSelectByte)(state, vals, tensor, mask);
luaT_pushudata(L, vals, torch_Tensor);
lua_pushboolean(L, 1);
return 2;
}
else if((maskCuda = luaT_toudata(L, 2, "torch.CudaTensor")))
{
- THTensor *vals = THTensor_(new)(state);
- THTensor_(maskedSelect)(state, vals, tensor, maskCuda);
+ THCTensor *vals = THCTensor_(new)(state);
+ THCTensor_(maskedSelect)(state, vals, tensor, maskCuda);
luaT_pushudata(L, vals, torch_Tensor);
lua_pushboolean(L, 1);
return 2;
@@ -978,18 +984,19 @@ static int torch_Tensor_(__index__)(lua_State *L)
return 1;
}
}
+#endif
static int torch_Tensor_(retain)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- THTensor_(retain)(cutorch_getstate(L), tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor_(retain)(cutorch_getstate(L), tensor);
return 0;
}
static int torch_Tensor_(free)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
- THTensor_(free)(cutorch_getstate(L), tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor_(free)(cutorch_getstate(L), tensor);
return 0;
}
@@ -1050,11 +1057,11 @@ static void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowSt
}
static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride,
- THStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_)
+ THCStorage **storage_, long *storageOffset_, THLongStorage **size_, THLongStorage **stride_)
{
THCState *state = cutorch_getstate(L);
- THTensor *src = NULL;
- THStorage *storage = NULL;
+ THCTensor *src = NULL;
+ THCStorage *storage = NULL;
int arg1Type = lua_type(L, index);
@@ -1070,8 +1077,8 @@ static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index
{
*storage_ = src->storage;
*storageOffset_ = src->storageOffset;
- *size_ = THTensor_(newSizeOf)(state, src);
- *stride_ = THTensor_(newStrideOf)(state, src);
+ *size_ = THCTensor_(newSizeOf)(state, src);
+ *stride_ = THCTensor_(newStrideOf)(state, src);
return;
}
else if( allowStorage && (arg1Type == LUA_TUSERDATA) && (storage = luaT_toudata(L, index, torch_Storage)) )
@@ -1114,14 +1121,14 @@ static void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index
static int torch_Tensor_(factory)(lua_State *L)
{
- THTensor *tensor = THTensor_(new)(cutorch_getstate(L));
+ THCTensor *tensor = THCTensor_(new)(cutorch_getstate(L));
luaT_pushudata(L, tensor, torch_Tensor);
return 1;
}
static int torch_Tensor_(write)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
THFile *file = luaT_checkudata(L, 2, "torch.File");
THFile_writeIntScalar(file, tensor->nDimension);
@@ -1134,7 +1141,7 @@ static int torch_Tensor_(write)(lua_State *L)
/* the storage */
if(tensor->storage)
{
- THStorage_(retain)(cutorch_getstate(L), tensor->storage);
+ THCStorage_(retain)(cutorch_getstate(L), tensor->storage);
luaT_pushudata(L, tensor->storage, torch_Storage);
}
else
@@ -1147,7 +1154,7 @@ static int torch_Tensor_(write)(lua_State *L)
static int torch_Tensor_(read)(lua_State *L)
{
- THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
+ THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
THFile *file = luaT_checkudata(L, 2, "torch.File");
tensor->nDimension = THFile_readIntScalar(file);
@@ -1164,7 +1171,7 @@ static int torch_Tensor_(read)(lua_State *L)
tensor->storage = luaT_toudata(L, -1, torch_Storage);
if(tensor->storage)
- THStorage_(retain)(cutorch_getstate(L), tensor->storage);
+ THCStorage_(retain)(cutorch_getstate(L), tensor->storage);
return 0;
}
@@ -1189,10 +1196,12 @@ static const struct luaL_Reg torch_Tensor_(_) [] = {
{"narrow", torch_Tensor_(narrow)},
{"sub", torch_Tensor_(sub)},
{"select", torch_Tensor_(select)},
+#ifdef THC_REAL_IS_FLOAT
{"index", torch_Tensor_(indexSelect)},
{"indexCopy", torch_Tensor_(indexCopy)},
{"indexAdd", torch_Tensor_(indexAdd)},
{"indexFill", torch_Tensor_(indexFill)},
+#endif
{"transpose", torch_Tensor_(transpose)},
{"t", torch_Tensor_(t)},
{"unfold", torch_Tensor_(unfold)},
@@ -1204,8 +1213,10 @@ static const struct luaL_Reg torch_Tensor_(_) [] = {
{"copy", torch_Tensor_(copy)},
{"read", torch_Tensor_(read)},
{"write", torch_Tensor_(write)},
+#ifdef THC_REAL_IS_FLOAT
{"__index__", torch_Tensor_(__index__)},
{"__newindex__", torch_Tensor_(__newindex__)},
+#endif
{NULL, NULL}
};