Auto device mode, plus allocation helper functions.

This diff introduces an alternative way of writing multi-GPU cutorch code. In this mode, the location of each tensor is specified, and the appropriate GPU for each kernel is determined automatically based on the location of its argument tensors. It's backwards-compatible and interoperable with the old-style multi-GPU API.
author: Adam Lerer <alerer@fb.com> 2015-04-18 04:20:51 +0300
committer: Adam Lerer <alerer@fb.com> 2015-04-29 17:47:13 +0300
commit: 47a2f6de252c2254234edfc1c6115229b5383bac (patch)
tree: dfd8992e4c6b8d8353e573ebfc4a7ec8b72a354a /FFI.lua
parent: 0903d5763025d163c575b11d5a51ce3d760c3ad1 (diff)
1 files changed, 7 insertions, 0 deletions
diff --git a/FFI.lua b/FFI.lua
index a6e15a2..2c12b80 100644
--- a/FFI.lua
+++ b/FFI.lua
@@ -5,6 +5,11 @@ if jit then
    local cdefs = [[
 typedef struct CUstream_st *cudaStream_t;
 
+typedef enum THCStateDeviceMode {
+  THCStateDeviceModeManual,
+  THCStateDeviceModeAuto
+} THCStateDeviceMode;
+
 typedef struct THCState
 {
   struct THCRNGState* rngState;
@@ -15,6 +20,7 @@ typedef struct THCState
   int numDevices;
   int numUserStreams;
   int currentPerDeviceStream;
+  THCStateDeviceMode deviceMode;
 } THCState;
 
 cudaStream_t THCState_getCurrentStream(THCState *state);
@@ -24,6 +30,7 @@ typedef struct THCudaStorage
     float *data;
     long size;
     int refcount;
+    int device;
     char flag;
     THAllocator *allocator;
     void *allocatorContext;
author	Adam Lerer <alerer@fb.com>	2015-04-18 04:20:51 +0300
committer	Adam Lerer <alerer@fb.com>	2015-04-29 17:47:13 +0300
commit	47a2f6de252c2254234edfc1c6115229b5383bac (patch)
tree	dfd8992e4c6b8d8353e573ebfc4a7ec8b72a354a /FFI.lua
parent	0903d5763025d163c575b11d5a51ce3d760c3ad1 (diff)