diff options
-rw-r--r-- | src/CMakeLists.txt | 18 | ||||
-rw-r--r-- | src/backend/cpu/dropout.cpp | 14 | ||||
-rw-r--r-- | src/backend/dispatch.h | 39 | ||||
-rw-r--r-- | src/backend/gpu/dropout.cu | 50 | ||||
-rw-r--r-- | src/graph/backend.h | 20 | ||||
-rw-r--r-- | src/graph/backend_gpu.h | 65 | ||||
-rw-r--r-- | src/graph/expression_graph.cpp | 7 | ||||
-rw-r--r-- | src/graph/expression_graph.h | 2 | ||||
-rw-r--r-- | src/graph/node.cu | 51 | ||||
-rw-r--r-- | src/graph/node.h | 5 | ||||
-rw-r--r-- | src/graph/node_operators_binary.h | 58 | ||||
-rw-r--r-- | src/graph/node_operators_unary.h | 5 | ||||
-rw-r--r-- | src/python/CMakeLists.txt | 4 | ||||
-rw-r--r-- | src/tests/dropout_test.cu | 2 |
14 files changed, 51 insertions, 289 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1bfdaf6..977d88f7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,19 +8,21 @@ cuda_add_library(marian 3rd_party/cnpy/cnpy.cpp 3rd_party/exception.cpp 3rd_party/svd/svd.cpp - graph/expression_graph.cpp - graph/expression_operators.cu - graph/node.cu - graph/node_operators.cu - graph/node_initializers.cu tensors/tensor.cu tensors/device.cu tensors/device.cpp + tensors/backend.cpp + tensors/gpu/dropout.cu + tensors/cpu/dropout.cpp kernels/tensor_operators.cu kernels/cudnn_wrappers.cu - backend/gpu/dropout.cu - backend/cpu/dropout.cpp + graph/expression_graph.cpp + graph/expression_operators.cu + graph/node.cpp + graph/node_operators.cu + graph/node_initializers.cu layers/convolution.cu + rnn/cells.cu optimizers/clippers.cu optimizers/optimizers.cu @@ -91,7 +93,7 @@ endforeach(exec) #set_target_properties(align2steps PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") if(PYTHONLIBS_FOUND) - add_subdirectory(python) +# add_subdirectory(python) endif(PYTHONLIBS_FOUND) if(COMPILE_TESTS) diff --git a/src/backend/cpu/dropout.cpp b/src/backend/cpu/dropout.cpp deleted file mode 100644 index 247dd460..00000000 --- a/src/backend/cpu/dropout.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include <algorithm>
-
-#include "backend/dispatch.h"
-
-namespace marian {
- namespace cpu {
-
- void Dropout(Ptr<Backend> backend, Tensor tensor, float p) {
- ABORT("Not implemented");
- std::fill(tensor->data(), tensor->data() + tensor->size(), p);
- }
-
- }
-}
diff --git a/src/backend/dispatch.h b/src/backend/dispatch.h deleted file mode 100644 index 4d9490e1..00000000 --- a/src/backend/dispatch.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once
-
-#include "common/definitions.h"
-#include "graph/backend.h"
-#include "tensors/tensor.h"
-
-#define DISPATCH1(Function, Arg1) \
- namespace gpu { \
- void Function(Ptr<Backend>, Arg1); \
- } \
- namespace cpu { \
- void Function(Ptr<Backend>, Arg1); \
- } \
- void Function(Ptr<Backend> backend, Arg1 arg1) { \
- if(backend->getDevice().type == DeviceType::gpu) \
- gpu::Function(backend, arg1); \
- else \
- cpu::Function(backend, arg1); \
- }
-
-#define DISPATCH2(Function, Arg1, Arg2) \
- namespace gpu { \
- void Function(Ptr<Backend>, Arg1, Arg2); \
- } \
- namespace cpu { \
- void Function(Ptr<Backend>, Arg1, Arg2); \
- } \
- static inline void Function(Ptr<Backend> backend, Arg1 arg1, Arg2 arg2) { \
- if(backend->getDevice().type == DeviceType::gpu) \
- gpu::Function(backend, arg1, arg2); \
- else \
- cpu::Function(backend, arg1, arg2); \
- }
-
-namespace marian {
-
- DISPATCH2(Dropout, Tensor, float)
-
-}
\ No newline at end of file diff --git a/src/backend/gpu/dropout.cu b/src/backend/gpu/dropout.cu deleted file mode 100644 index f2c29810..00000000 --- a/src/backend/gpu/dropout.cu +++ /dev/null @@ -1,50 +0,0 @@ -#include <cuda.h>
-#include <curand.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "backend/dispatch.h"
-#include "graph/backend_gpu.h"
-
-#define CUDA_CALL(x) \
- do { \
- if((x) != cudaSuccess) { \
- printf("Error at %s:%d\n", __FILE__, __LINE__); \
- exit(1); \
- } \
- } while(0)
-
-#define CURAND_CALL(x) \
- do { \
- if((x) != CURAND_STATUS_SUCCESS) { \
- printf("Error at %s:%d\n", __FILE__, __LINE__); \
- exit(1); \
- } \
- } while(0)
-
-
-namespace marian {
- namespace gpu {
-
- __global__ void gScale(float* data, int n, float p) {
- int index = threadIdx.x + blockIdx.x * blockDim.x;
-
- while(index < n) {
- data[index] = (data[index] < p) / p;
- index += gridDim.x * blockDim.x;
- }
- }
-
- void Dropout(Ptr<Backend> backend, Tensor tensor, float p) {
- curandGenerator_t gen = std::static_pointer_cast<BackendGPU>(backend)->getCurandGenerator();
- int n = tensor->size();
- CURAND_CALL(curandGenerateUniform(gen, tensor->data(), n));
-
- int numThreads = std::min(n, 512);
- int numBlocks = n / numThreads + (n % numThreads != 0);
-
- gScale<<<numBlocks, numThreads>>>(tensor->data(), n, 1.f - p);
- }
-
- }
-}
diff --git a/src/graph/backend.h b/src/graph/backend.h deleted file mode 100644 index 323f3f95..00000000 --- a/src/graph/backend.h +++ /dev/null @@ -1,20 +0,0 @@ -#pragma once - -#include "common/definitions.h" - -namespace marian { - -class Backend { -protected: - DeviceId deviceId_; - size_t seed_; - -public: - Backend(DeviceId deviceId, size_t seed) - : deviceId_(deviceId), seed_(seed) {} - - virtual DeviceId getDevice() { return deviceId_; }; - virtual void setDevice() = 0; -}; - -} diff --git a/src/graph/backend_gpu.h b/src/graph/backend_gpu.h deleted file mode 100644 index faa5cc77..00000000 --- a/src/graph/backend_gpu.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include <cublas_v2.h> -#include <cuda.h> -#include <curand.h> - -#include "common/config.h" -#include "graph/backend.h" - -#define CURAND_CALL(x) \ - do { \ - if((x) != CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(1); \ - } \ - } while(0) - -namespace marian { - -class BackendGPU : public Backend { -public: - BackendGPU(DeviceId deviceId, size_t seed) : Backend(deviceId, seed) { - setDevice(); - setHandles(); - } - - void setDevice() { - cudaSetDevice(deviceId_.no); - } - - cublasHandle_t getCublasHandle() { return cublasHandle_; } - - curandGenerator_t getCurandGenerator() { return curandGenerator_; } - -private: - cublasHandle_t cublasHandle_; - curandGenerator_t curandGenerator_; - - - void setHandles() { - cublasHandle_ = create_handle(); - curandGenerator_ = createCurandGenerator(); - } - - - curandGenerator_t createCurandGenerator() { - cudaSetDevice(deviceId_.no); - curandGenerator_t generator; - CURAND_CALL(curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_DEFAULT)); - CURAND_CALL(curandSetPseudoRandomGeneratorSeed(generator, seed_)); - - // cudaStream_t stream = 0; - // CURAND_CALL(curandSetStream(generator, stream)); - // CURAND_CALL(curandDestroyGenerator(generator)); - return generator; - } - - cublasHandle_t create_handle() { - cudaSetDevice(deviceId_.no); - cublasHandle_t cublasHandle; - cublasCreate(&cublasHandle); - return cublasHandle; - } -}; -} diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp index 9014abb5..183b5787 100644 --- a/src/graph/expression_graph.cpp +++ b/src/graph/expression_graph.cpp @@ -1,8 +1,7 @@ #include <sstream> -#include "graph/backend_gpu.h" #include "graph/expression_graph.h" -#include "backend/dispatch.h" +#include "tensors/dispatch.h" namespace marian { @@ -11,11 +10,9 @@ ExpressionGraph::ExpressionGraph(bool inference) void ExpressionGraph::setDevice(DeviceId deviceId) { if(!backend_) { - backend_ = New<BackendGPU>(deviceId, Config::seed); - + backend_ = BackendByDevice(deviceId, Config::seed); params_ = New<Parameters>(); params_->init(backend_->getDevice()); - tensors_ = New<TensorAllocator>(backend_->getDevice()); } } diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 7d50e9ba..defdb475 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -8,8 +8,8 @@ #include "common/definitions.h" #include "tensors/tensor_allocator.h" +#include "tensors/backend.h" -#include "graph/backend.h" #include "graph/parameters.h" #include "graph/chainable.h" #include "graph/node_operators.h" diff --git a/src/graph/node.cu b/src/graph/node.cu deleted file mode 100644 index a289f60c..00000000 --- a/src/graph/node.cu +++ /dev/null @@ -1,51 +0,0 @@ -#include "graph/backend_gpu.h" -#include "graph/expression_graph.h" -#include "graph/node.h" - -namespace marian { - -size_t Node::allocate() { - size_t elements = 0; - if(!val_) { - graph()->tensor(val_, shape_); - elements = val_->shape().elements(); - } - return elements; -} - -void Node::free() { - if(graph()) { - if(val_) - graph()->free(val_); - if(adj_) - graph()->free(adj_); - } -} - -void Node::init_dependent() { - if(!adj_) { - graph()->tensor(adj_, shape_); - adj_->set(1); - } -} - -void Node::set_zero_adjoint() { - if(!adj_) { - graph()->tensor(adj_, shape_); - adj_->set(0); - } -} - -float Node::scalar() { - return val_->scalar(); -} - -Ptr<Backend> Node::getBackend() { - return graph()->getBackend(); -} - -void NaryNodeOp::remove_children_from_top_nodes() { - for(auto child : children_) - graph()->remove_top_node(child); -} -} diff --git a/src/graph/node.h b/src/graph/node.h index 126d7d49..aa450000 100644 --- a/src/graph/node.h +++ b/src/graph/node.h @@ -5,10 +5,11 @@ #include <thread> #include "common/keywords.h" -#include "graph/backend.h" -#include "graph/chainable.h" +#include "tensors/backend.h" #include "tensors/tensor.h" +#include "graph/chainable.h" + namespace marian { class Node : public Chainable<Tensor>, diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h index 50ead8e6..7d6836a3 100644 --- a/src/graph/node_operators_binary.h +++ b/src/graph/node_operators_binary.h @@ -2,10 +2,10 @@ #include <thread> -#include "graph/backend_gpu.h" +#include "tensors/gpu/backend.h" #include "graph/node.h" -#include "kernels/tensor_operators.h" #include "functional/functional.h" +#include "kernels/tensor_operators.h" #include "kernels/cudnn_wrappers.h" namespace marian { @@ -54,7 +54,7 @@ public: NodeOps forwardOps() { // C = alpha * dot(op(A), op(B)) return {NodeOp(Prod( - std::static_pointer_cast<BackendGPU>(getBackend())->getCublasHandle(), + std::static_pointer_cast<gpu::Backend>(getBackend())->getCublasHandle(), val_, child(0)->val(), child(1)->val(), @@ -72,7 +72,7 @@ public: // to sum gradients from different graph parts if(!transA_ && transB_) - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), adj_, @@ -81,7 +81,7 @@ public: false, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), adj_, @@ -92,7 +92,7 @@ public: scalar_))}; if(transA_ && !transB_) - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), child(1)->val(), @@ -101,7 +101,7 @@ public: true, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), child(0)->val(), @@ -112,7 +112,7 @@ public: scalar_))}; if(transA_ && transB_) - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), child(1)->val(), @@ -121,7 +121,7 @@ public: true, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), adj_, @@ -131,7 +131,7 @@ public: 1.0, scalar_))}; - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), adj_, @@ -140,7 +140,7 @@ public: true, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), child(0)->val(), @@ -198,7 +198,7 @@ public: using namespace functional; return { NodeOp(Prod( - std::static_pointer_cast<BackendGPU>(getBackend())->getCublasHandle(), + std::static_pointer_cast<gpu::Backend>(getBackend())->getCublasHandle(), val_, child(0)->val(), child(1)->val(), @@ -219,7 +219,7 @@ public: using namespace functional; if(!transA_ && transB_) - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), adj_, @@ -228,7 +228,7 @@ public: false, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), adj_, @@ -240,7 +240,7 @@ public: NodeOp(Add(_1, child(2)->grad(), adj_))}; if(transA_ && !transB_) - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), child(1)->val(), @@ -249,7 +249,7 @@ public: true, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), child(0)->val(), @@ -261,7 +261,7 @@ public: NodeOp(Add(_1, child(2)->grad(), adj_))}; if(transA_ && transB_) - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), child(1)->val(), @@ -270,7 +270,7 @@ public: true, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), adj_, @@ -281,7 +281,7 @@ public: scalar_)), NodeOp(Add(_1, child(2)->grad(), adj_))}; - return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), adj_, @@ -290,7 +290,7 @@ public: true, 1.0, scalar_)), - NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), child(0)->val(), @@ -350,7 +350,7 @@ public: NodeOps forwardOps() { // C = alpha * dot(op(A), op(B)) return {NodeOp(ProdBatched( - std::static_pointer_cast<BackendGPU>(getBackend())->getCublasHandle(), + std::static_pointer_cast<gpu::Backend>(getBackend())->getCublasHandle(), val_, child(0)->val(), child(1)->val(), @@ -369,7 +369,7 @@ public: if(!transA_ && transB_) return { - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), adj_, @@ -378,7 +378,7 @@ public: false, 1.0, scalar_)), - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), adj_, @@ -390,7 +390,7 @@ public: if(transA_ && !transB_) return { - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), child(1)->val(), @@ -399,7 +399,7 @@ public: true, 1.0, scalar_)), - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), child(0)->val(), @@ -411,7 +411,7 @@ public: if(transA_ && transB_) return { - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), child(1)->val(), @@ -420,7 +420,7 @@ public: true, 1.0, scalar_)), - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), adj_, @@ -431,7 +431,7 @@ public: scalar_))}; return { - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(0)->grad(), adj_, @@ -440,7 +440,7 @@ public: true, 1.0, scalar_)), - NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend()) + NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend()) ->getCublasHandle(), child(1)->grad(), child(0)->val(), diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h index 8390a0c2..a3f27fd2 100644 --- a/src/graph/node_operators_unary.h +++ b/src/graph/node_operators_unary.h @@ -1,10 +1,11 @@ #pragma once -#include "graph/backend_gpu.h" +#include "tensors/tensor.h" +#include "tensors/gpu/backend.h" + #include "graph/node.h" #include "kernels/sparse.h" #include "kernels/tensor_operators.h" -#include "tensors/tensor.h" #include "functional/functional.h" #include "kernels/cudnn_wrappers.h" diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt index d547660a..9d54c01a 100644 --- a/src/python/CMakeLists.txt +++ b/src/python/CMakeLists.txt @@ -10,8 +10,8 @@ cuda_add_library(pymarian SHARED ../tensors/tensor.cu ../tensors/device.cpp ../kernels/tensor_operators.cu - ../backend/gpu/dropout.cu - ../backend/cpu/dropout.cpp + ../tensors/gpu/dropout.cu + ../tensors/cpu/dropout.cpp ../kernels/sparse.cu #../layers/param_initializers.cu ../rnn/attention.cu diff --git a/src/tests/dropout_test.cu b/src/tests/dropout_test.cu index 37738deb..f4a42e92 100644 --- a/src/tests/dropout_test.cu +++ b/src/tests/dropout_test.cu @@ -13,7 +13,7 @@ int main(int argc, char** argv) { auto c = New<Config>(argc, argv); auto g = New<ExpressionGraph>(); - g->setDevice({0, DeviceType::cpu}); + g->setDevice({0, DeviceType::gpu}); g->reserveWorkspaceMB(512); for(int i = 0; i < 10; ++i) { |