Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/cutorch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoumith <soumith@fb.com>2016-10-26 04:23:53 +0300
committersoumith <soumith@fb.com>2016-10-26 04:23:53 +0300
commit64f974178c03c93666cfe3796b7e2d7b549476a2 (patch)
treea075bc31f5eccbd7655299b5c2671359e2f1962d
parent17300d9cc0c462dfde81eb81f89ba0a15e095844 (diff)
pushing THCState back to the headerthcstateheader
-rw-r--r--lib/THC/CMakeLists.txt2
-rw-r--r--lib/THC/THCGeneral.c54
-rw-r--r--lib/THC/THCGeneral.h.in58
-rw-r--r--lib/THC/THCStream.h4
4 files changed, 60 insertions, 58 deletions
diff --git a/lib/THC/CMakeLists.txt b/lib/THC/CMakeLists.txt
index 244568f..b081345 100644
--- a/lib/THC/CMakeLists.txt
+++ b/lib/THC/CMakeLists.txt
@@ -113,6 +113,7 @@ ELSE()
SET(THC_INSTALL_CMAKE_SUBDIR ${Torch_INSTALL_CMAKE_SUBDIR})
ENDIF()
+INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
CONFIGURE_FILE(THCGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h")
@@ -200,6 +201,7 @@ INSTALL(FILES
THCStorage.h
THCStorageCopy.h
THCStream.h
+ THCThreadLocal.h
THCTensor.h
THCTensorCopy.h
THCTensorRandom.h
diff --git a/lib/THC/THCGeneral.c b/lib/THC/THCGeneral.c
index 0b75399..0a1d340 100644
--- a/lib/THC/THCGeneral.c
+++ b/lib/THC/THCGeneral.c
@@ -12,60 +12,6 @@
#define GLOBAL_SCRATCH_SPACE_PER_SM_STREAM 4 * sizeof(float)
-typedef struct _THCCudaResourcesPerDevice {
- THCStream** streams;
- cublasHandle_t* blasHandles;
- /* Size of scratch space per each stream on this device available */
- size_t scratchSpacePerStream;
- /* Device-resident scratch space per stream, used for global memory
- reduction kernels. */
- void** devScratchSpacePerStream;
-} THCCudaResourcesPerDevice;
-
-struct THCState {
- struct THCRNGState* rngState;
- struct cudaDeviceProp* deviceProperties;
- /* Set of all allocated resources. resourcePerDevice[dev]->streams[0] is NULL,
- which specifies the per-device default stream. blasHandles do not have a
- default and must be explicitly initialized. We always initialize 1
- blasHandle but we can use more.
- */
- THCCudaResourcesPerDevice* resourcesPerDevice;
- /* Captured number of devices upon startup; convenience for bounds checking */
- int numDevices;
- /* Number of Torch defined resources available, indices 1 ... numStreams */
- int numUserStreams;
- int numUserBlasHandles;
-
- /* Allocator using cudaMallocHost. */
- THAllocator* cudaHostAllocator;
- THCDeviceAllocator* cudaDeviceAllocator;
-
- /* Index of the current selected BLAS handle. The actual BLAS handle used
- depends on the current device. */
- THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle;
- /* Array of thread locals containing the current stream for each device */
- THCThreadLocal* currentStreams;
-
- /* Table of enabled peer-to-peer access between directed pairs of GPUs.
- If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
- int** p2pAccessEnabled;
-
- /* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU
- copies are allowed via p2p if p2p access is enabled at all for
- the pair of GPUs in question, but if this flag is true, then
- all cross-GPU access checks are disabled, allowing kernels to
- directly access memory on another GPUs.
- Note that p2p access must exist and be enabled for the pair of
- GPUs in question. */
- int p2pKernelAccessEnabled;
-
- void (*cutorchGCFunction)(void *data);
- void *cutorchGCData;
- ptrdiff_t heapSoftmax;
- ptrdiff_t heapDelta;
-};
-
THCCudaResourcesPerDevice* THCState_getDeviceResourcePtr(
THCState *state, int device);
diff --git a/lib/THC/THCGeneral.h.in b/lib/THC/THCGeneral.h.in
index 8b3ac74..22aab03 100644
--- a/lib/THC/THCGeneral.h.in
+++ b/lib/THC/THCGeneral.h.in
@@ -3,6 +3,7 @@
#include "THGeneral.h"
#include "THAllocator.h"
+#include "THCThreadLocal.h"
#undef log1p
#include "cuda.h"
@@ -40,7 +41,8 @@
#endif
struct THCRNGState; /* Random number generator state. */
-struct THCStream;
+typedef struct THCStream THCStream;
+typedef struct THCState THCState;
typedef struct _THCDeviceAllocator {
cudaError_t (*malloc)( void*, void**, size_t, cudaStream_t);
@@ -50,9 +52,61 @@ typedef struct _THCDeviceAllocator {
void* state;
} THCDeviceAllocator;
+typedef struct _THCCudaResourcesPerDevice {
+ THCStream** streams;
+ cublasHandle_t* blasHandles;
+ /* Size of scratch space per each stream on this device available */
+ size_t scratchSpacePerStream;
+ /* Device-resident scratch space per stream, used for global memory
+ reduction kernels. */
+ void** devScratchSpacePerStream;
+} THCCudaResourcesPerDevice;
+
/* Global state to be held in the cutorch table. */
-typedef struct THCState THCState;
+struct THCState {
+ struct THCRNGState* rngState;
+ struct cudaDeviceProp* deviceProperties;
+ /* Set of all allocated resources. resourcePerDevice[dev]->streams[0] is NULL,
+ which specifies the per-device default stream. blasHandles do not have a
+ default and must be explicitly initialized. We always initialize 1
+ blasHandle but we can use more.
+ */
+ THCCudaResourcesPerDevice* resourcesPerDevice;
+ /* Captured number of devices upon startup; convenience for bounds checking */
+ int numDevices;
+ /* Number of Torch defined resources available, indices 1 ... numStreams */
+ int numUserStreams;
+ int numUserBlasHandles;
+
+ /* Allocator using cudaMallocHost. */
+ THAllocator* cudaHostAllocator;
+ THCDeviceAllocator* cudaDeviceAllocator;
+
+ /* Index of the current selected BLAS handle. The actual BLAS handle used
+ depends on the current device. */
+ THCThreadLocal/*<int>*/ currentPerDeviceBlasHandle;
+ /* Array of thread locals containing the current stream for each device */
+ THCThreadLocal* currentStreams;
+
+ /* Table of enabled peer-to-peer access between directed pairs of GPUs.
+ If i accessing allocs on j is enabled, p2pAccess[i][j] is 1; 0 otherwise. */
+ int** p2pAccessEnabled;
+
+ /* Is direct cross-kernel p2p access allowed? Normally, only cross-GPU
+ copies are allowed via p2p if p2p access is enabled at all for
+ the pair of GPUs in question, but if this flag is true, then
+ all cross-GPU access checks are disabled, allowing kernels to
+ directly access memory on another GPUs.
+ Note that p2p access must exist and be enabled for the pair of
+ GPUs in question. */
+ int p2pKernelAccessEnabled;
+
+ void (*cutorchGCFunction)(void *data);
+ void *cutorchGCData;
+ ptrdiff_t heapSoftmax;
+ ptrdiff_t heapDelta;
+};
THC_API THCState* THCState_alloc();
THC_API void THCState_free(THCState* state);
diff --git a/lib/THC/THCStream.h b/lib/THC/THCStream.h
index 7e4bb49..de3f64e 100644
--- a/lib/THC/THCStream.h
+++ b/lib/THC/THCStream.h
@@ -4,12 +4,12 @@
#include <cuda_runtime_api.h>
#include "THCGeneral.h"
-typedef struct THCStream
+struct THCStream
{
cudaStream_t stream;
int device;
int refcount;
-} THCStream;
+};
THC_API THCStream* THCStream_new(int flags);