diff options
author | soumith <soumith@fb.com> | 2016-10-26 04:23:53 +0300 |
---|---|---|
committer | soumith <soumith@fb.com> | 2016-10-26 04:23:53 +0300 |
commit | 64f974178c03c93666cfe3796b7e2d7b549476a2 (patch) | |
tree | a075bc31f5eccbd7655299b5c2671359e2f1962d | |
parent | 17300d9cc0c462dfde81eb81f89ba0a15e095844 (diff) |
pushing THCState back to the headerthcstateheader
-rw-r--r-- | lib/THC/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/THC/THCGeneral.c | 54 | ||||
-rw-r--r-- | lib/THC/THCGeneral.h.in | 58 | ||||
-rw-r--r-- | lib/THC/THCStream.h | 4 |
4 files changed, 60 insertions, 58 deletions
diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt index 244568f..b081345 100644 --- a/lib/THC/CMakeLists.txt +++ b/lib/THC/CMakeLists.txt @@ -113,6 +113,7 @@ ELSE() SET(THC_INSTALL_CMAKE_SUBDIR ${Torch_INSTALL_CMAKE_SUBDIR}) ENDIF() +INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}") INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}") CONFIGURE_FILE(THCGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h") @@ -200,6 +201,7 @@ INSTALL(FILES THCStorage.h THCStorageCopy.h THCStream.h + THCThreadLocal.h THCTensor.h THCTensorCopy.h THCTensorRandom.h diff --git a/lib/THC/THCGeneral.c b/lib/THC/THCGeneral.c index 0b75399..0a1d340 100644 --- a/lib/THC/THCGeneral.c +++ b/lib/THC/THCGeneral.c @@ -12,60 +12,6 @@ #define GLOBAL_SCRATCH_SPACE_PER_SM_STREAM 4 * sizeof(float) -typedef struct _THCCudaResourcesPerDevice { - THCStream** streams; - cublasHandle_t* blasHandles; - /* Size of scratch space per each stream on this device available */ - size_t scratchSpacePerStream; - /* Device-resident scratch space per stream, used for global memory - reduction kernels. */ - void** devScratchSpacePerStream; -} THCCudaResourcesPerDevice; - -struct THCState { - struct THCRNGState* rngState; - struct cudaDeviceProp* deviceProperties; - /* Set of all allocated resources. resourcePerDevice[dev]->streams[0] is NULL, - which specifies the per-device default stream. blasHandles do not have a - default and must be explicitly initialized. We always initialize 1 - blasHandle but we can use more. - */ - THCCudaResourcesPerDevice* resourcesPerDevice; - /* Captured number of devices upon startup; convenience for bounds checking */ - int numDevices; - /* Number of Torch defined resources available, indices 1 ... numStreams */ - int numUserStreams; - int numUserBlasHandles; - - /* Allocator using cudaMallocHost. */ - THAllocator* cudaHostAllocator; - THCDeviceAllocator* cudaDeviceAllocator; - - /* Index of the current selected BLAS handle. The actual BLAS handle used - depends on the current device. */ - THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle; - /* Array of thread locals containing the current stream for each device */ - THCThreadLocal* currentStreams; - - /* Table of enabled peer-to-peer access between directed pairs of GPUs. - If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */ - int** p2pAccessEnabled; - - /* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU - copies are allowed via p2p if p2p access is enabled at all for - the pair of GPUs in question, but if this flag is true, then - all cross-GPU access checks are disabled, allowing kernels to - directly access memory on another GPUs. - Note that p2p access must exist and be enabled for the pair of - GPUs in question. */ - int p2pKernelAccessEnabled; - - void (*cutorchGCFunction)(void *data); - void *cutorchGCData; - ptrdiff_t heapSoftmax; - ptrdiff_t heapDelta; -}; - THCCudaResourcesPerDevice* THCState_getDeviceResourcePtr( THCState *state, int device); diff --git a/lib/THC/THCGeneral.h.in b/lib/THC/THCGeneral.h.in index 8b3ac74..22aab03 100644 --- a/lib/THC/THCGeneral.h.in +++ b/lib/THC/THCGeneral.h.in @@ -3,6 +3,7 @@ #include "THGeneral.h" #include "THAllocator.h" +#include "THCThreadLocal.h" #undef log1p #include "cuda.h" @@ -40,7 +41,8 @@ #endif struct THCRNGState; /* Random number generator state. */ -struct THCStream; +typedef struct THCStream THCStream; +typedef struct THCState THCState; typedef struct _THCDeviceAllocator { cudaError_t (*malloc)( void*, void**, size_t, cudaStream_t); @@ -50,9 +52,61 @@ typedef struct _THCDeviceAllocator { void* state; } THCDeviceAllocator; +typedef struct _THCCudaResourcesPerDevice { + THCStream** streams; + cublasHandle_t* blasHandles; + /* Size of scratch space per each stream on this device available */ + size_t scratchSpacePerStream; + /* Device-resident scratch space per stream, used for global memory + reduction kernels. */ + void** devScratchSpacePerStream; +} THCCudaResourcesPerDevice; + /* Global state to be held in the cutorch table. */ -typedef struct THCState THCState; +struct THCState { + struct THCRNGState* rngState; + struct cudaDeviceProp* deviceProperties; + /* Set of all allocated resources. resourcePerDevice[dev]->streams[0] is NULL, + which specifies the per-device default stream. blasHandles do not have a + default and must be explicitly initialized. We always initialize 1 + blasHandle but we can use more. + */ + THCCudaResourcesPerDevice* resourcesPerDevice; + /* Captured number of devices upon startup; convenience for bounds checking */ + int numDevices; + /* Number of Torch defined resources available, indices 1 ... numStreams */ + int numUserStreams; + int numUserBlasHandles; + + /* Allocator using cudaMallocHost. */ + THAllocator* cudaHostAllocator; + THCDeviceAllocator* cudaDeviceAllocator; + + /* Index of the current selected BLAS handle. The actual BLAS handle used + depends on the current device. */ + THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle; + /* Array of thread locals containing the current stream for each device */ + THCThreadLocal* currentStreams; + + /* Table of enabled peer-to-peer access between directed pairs of GPUs. + If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */ + int** p2pAccessEnabled; + + /* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU + copies are allowed via p2p if p2p access is enabled at all for + the pair of GPUs in question, but if this flag is true, then + all cross-GPU access checks are disabled, allowing kernels to + directly access memory on another GPUs. + Note that p2p access must exist and be enabled for the pair of + GPUs in question. */ + int p2pKernelAccessEnabled; + + void (*cutorchGCFunction)(void *data); + void *cutorchGCData; + ptrdiff_t heapSoftmax; + ptrdiff_t heapDelta; +}; THC_API THCState* THCState_alloc(); THC_API void THCState_free(THCState* state); diff --git a/lib/THC/THCStream.h b/lib/THC/THCStream.h index 7e4bb49..de3f64e 100644 --- a/lib/THC/THCStream.h +++ b/lib/THC/THCStream.h @@ -4,12 +4,12 @@ #include <cuda_runtime_api.h> #include "THCGeneral.h" -typedef struct THCStream +struct THCStream { cudaStream_t stream; int device; int refcount; -} THCStream; +}; THC_API THCStream* THCStream_new(int flags); |