pushing THCState back to the headerthcstateheader

author: soumith <soumith@fb.com> 2016-10-26 04:23:53 +0300
committer: soumith <soumith@fb.com> 2016-10-26 04:23:53 +0300
commit: 64f974178c03c93666cfe3796b7e2d7b549476a2 (patch)
tree: a075bc31f5eccbd7655299b5c2671359e2f1962d
parent: 17300d9cc0c462dfde81eb81f89ba0a15e095844 (diff)
4 files changed, 60 insertions, 58 deletions
diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt
index 244568f..b081345 100644
--- a/lib/THC/CMakeLists.txt
+++ b/lib/THC/CMakeLists.txt
@@ -113,6 +113,7 @@ ELSE()
   SET(THC_INSTALL_CMAKE_SUBDIR ${Torch_INSTALL_CMAKE_SUBDIR})
 ENDIF()
 
+INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}")
 INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
 CONFIGURE_FILE(THCGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h")
 
@@ -200,6 +201,7 @@ INSTALL(FILES
           THCStorage.h
           THCStorageCopy.h
           THCStream.h
+          THCThreadLocal.h
           THCTensor.h
           THCTensorCopy.h
           THCTensorRandom.h
diff --git a/lib/THC/THCGeneral.c b/lib/THC/THCGeneral.c
index 0b75399..0a1d340 100644
--- a/lib/THC/THCGeneral.c
+++ b/lib/THC/THCGeneral.c
@@ -12,60 +12,6 @@
 #define GLOBAL_SCRATCH_SPACE_PER_SM_STREAM 4 * sizeof(float)
 
 
-typedef struct _THCCudaResourcesPerDevice {
-  THCStream** streams;
-  cublasHandle_t* blasHandles;
-  /* Size of scratch space per each stream on this device available */
-  size_t scratchSpacePerStream;
-  /* Device-resident scratch space per stream, used for global memory
-     reduction kernels. */
-  void** devScratchSpacePerStream;
-} THCCudaResourcesPerDevice;
-
-struct THCState {
-  struct THCRNGState* rngState;
-  struct cudaDeviceProp* deviceProperties;
-  /* Set of all allocated resources. resourcePerDevice[dev]->streams[0] is NULL,
-     which specifies the per-device default stream. blasHandles do not have a
-     default and must be explicitly initialized. We always initialize 1
-     blasHandle but we can use more.
-  */
-  THCCudaResourcesPerDevice* resourcesPerDevice;
-  /* Captured number of devices upon startup; convenience for bounds checking */
-  int numDevices;
-  /* Number of Torch defined resources available, indices 1 ... numStreams */
-  int numUserStreams;
-  int numUserBlasHandles;
-
-  /* Allocator using cudaMallocHost. */
-  THAllocator* cudaHostAllocator;
-  THCDeviceAllocator* cudaDeviceAllocator;
-
-  /* Index of the current selected BLAS handle. The actual BLAS handle used
-     depends on the current device. */
-  THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle;
-  /* Array of thread locals containing the current stream for each device */
-  THCThreadLocal* currentStreams;
-
-  /* Table of enabled peer-to-peer access between directed pairs of GPUs.
-     If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
-  int** p2pAccessEnabled;
-
-  /* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU
-     copies are allowed via p2p if p2p access is enabled at all for
-     the pair of GPUs in question, but if this flag is true, then
-     all cross-GPU access checks are disabled, allowing kernels to
-     directly access memory on another GPUs.
-     Note that p2p access must exist and be enabled for the pair of
-     GPUs in question. */
-  int p2pKernelAccessEnabled;
-
-  void (*cutorchGCFunction)(void *data);
-  void *cutorchGCData;
-  ptrdiff_t heapSoftmax;
-  ptrdiff_t heapDelta;
-};
-
 THCCudaResourcesPerDevice* THCState_getDeviceResourcePtr(
   THCState *state, int device);
 
diff --git a/lib/THC/THCGeneral.h.in b/lib/THC/THCGeneral.h.in
index 8b3ac74..22aab03 100644
--- a/lib/THC/THCGeneral.h.in
+++ b/lib/THC/THCGeneral.h.in
@@ -3,6 +3,7 @@
 
 #include "THGeneral.h"
 #include "THAllocator.h"
+#include "THCThreadLocal.h"
 #undef log1p
 
 #include "cuda.h"
@@ -40,7 +41,8 @@
 #endif
 
 struct THCRNGState;  /* Random number generator state. */
-struct THCStream;
+typedef struct THCStream THCStream;
+typedef struct THCState THCState;
 
 typedef struct _THCDeviceAllocator {
    cudaError_t (*malloc)( void*, void**, size_t,         cudaStream_t);
@@ -50,9 +52,61 @@ typedef struct _THCDeviceAllocator {
    void* state;
 } THCDeviceAllocator;
 
+typedef struct _THCCudaResourcesPerDevice {
+  THCStream** streams;
+  cublasHandle_t* blasHandles;
+  /* Size of scratch space per each stream on this device available */
+  size_t scratchSpacePerStream;
+  /* Device-resident scratch space per stream, used for global memory
+     reduction kernels. */
+  void** devScratchSpacePerStream;
+} THCCudaResourcesPerDevice;
+
 
 /* Global state to be held in the cutorch table. */
-typedef struct THCState THCState;
+struct THCState {
+  struct THCRNGState* rngState;
+  struct cudaDeviceProp* deviceProperties;
+  /* Set of all allocated resources. resourcePerDevice[dev]->streams[0] is NULL,
+     which specifies the per-device default stream. blasHandles do not have a
+     default and must be explicitly initialized. We always initialize 1
+     blasHandle but we can use more.
+  */
+  THCCudaResourcesPerDevice* resourcesPerDevice;
+  /* Captured number of devices upon startup; convenience for bounds checking */
+  int numDevices;
+  /* Number of Torch defined resources available, indices 1 ... numStreams */
+  int numUserStreams;
+  int numUserBlasHandles;
+
+  /* Allocator using cudaMallocHost. */
+  THAllocator* cudaHostAllocator;
+  THCDeviceAllocator* cudaDeviceAllocator;
+
+  /* Index of the current selected BLAS handle. The actual BLAS handle used
+     depends on the current device. */
+  THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle;
+  /* Array of thread locals containing the current stream for each device */
+  THCThreadLocal* currentStreams;
+
+  /* Table of enabled peer-to-peer access between directed pairs of GPUs.
+     If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
+  int** p2pAccessEnabled;
+
+  /* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU
+     copies are allowed via p2p if p2p access is enabled at all for
+     the pair of GPUs in question, but if this flag is true, then
+     all cross-GPU access checks are disabled, allowing kernels to
+     directly access memory on another GPUs.
+     Note that p2p access must exist and be enabled for the pair of
+     GPUs in question. */
+  int p2pKernelAccessEnabled;
+
+  void (*cutorchGCFunction)(void *data);
+  void *cutorchGCData;
+  ptrdiff_t heapSoftmax;
+  ptrdiff_t heapDelta;
+};
 
 THC_API THCState* THCState_alloc();
 THC_API void THCState_free(THCState* state);
diff --git a/lib/THC/THCStream.h b/lib/THC/THCStream.h
index 7e4bb49..de3f64e 100644
--- a/lib/THC/THCStream.h
+++ b/lib/THC/THCStream.h
@@ -4,12 +4,12 @@
 #include <cuda_runtime_api.h>
 #include "THCGeneral.h"
 
-typedef struct THCStream
+struct THCStream
 {
     cudaStream_t stream;
     int device;
     int refcount;
-} THCStream;
+};
 
 
 THC_API THCStream* THCStream_new(int flags);
author	soumith <soumith@fb.com>	2016-10-26 04:23:53 +0300
committer	soumith <soumith@fb.com>	2016-10-26 04:23:53 +0300
commit	64f974178c03c93666cfe3796b7e2d7b549476a2 (patch)
tree	a075bc31f5eccbd7655299b5c2671359e2f1962d
parent	17300d9cc0c462dfde81eb81f89ba0a15e095844 (diff)