Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/CMakeLists.txt18
-rw-r--r--src/backend/cpu/dropout.cpp14
-rw-r--r--src/backend/dispatch.h39
-rw-r--r--src/backend/gpu/dropout.cu50
-rw-r--r--src/graph/backend.h20
-rw-r--r--src/graph/backend_gpu.h65
-rw-r--r--src/graph/expression_graph.cpp7
-rw-r--r--src/graph/expression_graph.h2
-rw-r--r--src/graph/node.cu51
-rw-r--r--src/graph/node.h5
-rw-r--r--src/graph/node_operators_binary.h58
-rw-r--r--src/graph/node_operators_unary.h5
-rw-r--r--src/python/CMakeLists.txt4
-rw-r--r--src/tests/dropout_test.cu2
14 files changed, 51 insertions, 289 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c1bfdaf6..977d88f7 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -8,19 +8,21 @@ cuda_add_library(marian
3rd_party/cnpy/cnpy.cpp
3rd_party/exception.cpp
3rd_party/svd/svd.cpp
- graph/expression_graph.cpp
- graph/expression_operators.cu
- graph/node.cu
- graph/node_operators.cu
- graph/node_initializers.cu
tensors/tensor.cu
tensors/device.cu
tensors/device.cpp
+ tensors/backend.cpp
+ tensors/gpu/dropout.cu
+ tensors/cpu/dropout.cpp
kernels/tensor_operators.cu
kernels/cudnn_wrappers.cu
- backend/gpu/dropout.cu
- backend/cpu/dropout.cpp
+ graph/expression_graph.cpp
+ graph/expression_operators.cu
+ graph/node.cpp
+ graph/node_operators.cu
+ graph/node_initializers.cu
layers/convolution.cu
+
rnn/cells.cu
optimizers/clippers.cu
optimizers/optimizers.cu
@@ -91,7 +93,7 @@ endforeach(exec)
#set_target_properties(align2steps PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
if(PYTHONLIBS_FOUND)
- add_subdirectory(python)
+# add_subdirectory(python)
endif(PYTHONLIBS_FOUND)
if(COMPILE_TESTS)
diff --git a/src/backend/cpu/dropout.cpp b/src/backend/cpu/dropout.cpp
deleted file mode 100644
index 247dd460..00000000
--- a/src/backend/cpu/dropout.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <algorithm>
-
-#include "backend/dispatch.h"
-
-namespace marian {
- namespace cpu {
-
- void Dropout(Ptr<Backend> backend, Tensor tensor, float p) {
- ABORT("Not implemented");
- std::fill(tensor->data(), tensor->data() + tensor->size(), p);
- }
-
- }
-}
diff --git a/src/backend/dispatch.h b/src/backend/dispatch.h
deleted file mode 100644
index 4d9490e1..00000000
--- a/src/backend/dispatch.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#pragma once
-
-#include "common/definitions.h"
-#include "graph/backend.h"
-#include "tensors/tensor.h"
-
-#define DISPATCH1(Function, Arg1) \
- namespace gpu { \
- void Function(Ptr<Backend>, Arg1); \
- } \
- namespace cpu { \
- void Function(Ptr<Backend>, Arg1); \
- } \
- void Function(Ptr<Backend> backend, Arg1 arg1) { \
- if(backend->getDevice().type == DeviceType::gpu) \
- gpu::Function(backend, arg1); \
- else \
- cpu::Function(backend, arg1); \
- }
-
-#define DISPATCH2(Function, Arg1, Arg2) \
- namespace gpu { \
- void Function(Ptr<Backend>, Arg1, Arg2); \
- } \
- namespace cpu { \
- void Function(Ptr<Backend>, Arg1, Arg2); \
- } \
- static inline void Function(Ptr<Backend> backend, Arg1 arg1, Arg2 arg2) { \
- if(backend->getDevice().type == DeviceType::gpu) \
- gpu::Function(backend, arg1, arg2); \
- else \
- cpu::Function(backend, arg1, arg2); \
- }
-
-namespace marian {
-
- DISPATCH2(Dropout, Tensor, float)
-
-} \ No newline at end of file
diff --git a/src/backend/gpu/dropout.cu b/src/backend/gpu/dropout.cu
deleted file mode 100644
index f2c29810..00000000
--- a/src/backend/gpu/dropout.cu
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <cuda.h>
-#include <curand.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "backend/dispatch.h"
-#include "graph/backend_gpu.h"
-
-#define CUDA_CALL(x) \
- do { \
- if((x) != cudaSuccess) { \
- printf("Error at %s:%d\n", __FILE__, __LINE__); \
- exit(1); \
- } \
- } while(0)
-
-#define CURAND_CALL(x) \
- do { \
- if((x) != CURAND_STATUS_SUCCESS) { \
- printf("Error at %s:%d\n", __FILE__, __LINE__); \
- exit(1); \
- } \
- } while(0)
-
-
-namespace marian {
- namespace gpu {
-
- __global__ void gScale(float* data, int n, float p) {
- int index = threadIdx.x + blockIdx.x * blockDim.x;
-
- while(index < n) {
- data[index] = (data[index] < p) / p;
- index += gridDim.x * blockDim.x;
- }
- }
-
- void Dropout(Ptr<Backend> backend, Tensor tensor, float p) {
- curandGenerator_t gen = std::static_pointer_cast<BackendGPU>(backend)->getCurandGenerator();
- int n = tensor->size();
- CURAND_CALL(curandGenerateUniform(gen, tensor->data(), n));
-
- int numThreads = std::min(n, 512);
- int numBlocks = n / numThreads + (n % numThreads != 0);
-
- gScale<<<numBlocks, numThreads>>>(tensor->data(), n, 1.f - p);
- }
-
- }
-}
diff --git a/src/graph/backend.h b/src/graph/backend.h
deleted file mode 100644
index 323f3f95..00000000
--- a/src/graph/backend.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include "common/definitions.h"
-
-namespace marian {
-
-class Backend {
-protected:
- DeviceId deviceId_;
- size_t seed_;
-
-public:
- Backend(DeviceId deviceId, size_t seed)
- : deviceId_(deviceId), seed_(seed) {}
-
- virtual DeviceId getDevice() { return deviceId_; };
- virtual void setDevice() = 0;
-};
-
-}
diff --git a/src/graph/backend_gpu.h b/src/graph/backend_gpu.h
deleted file mode 100644
index faa5cc77..00000000
--- a/src/graph/backend_gpu.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#pragma once
-
-#include <cublas_v2.h>
-#include <cuda.h>
-#include <curand.h>
-
-#include "common/config.h"
-#include "graph/backend.h"
-
-#define CURAND_CALL(x) \
- do { \
- if((x) != CURAND_STATUS_SUCCESS) { \
- printf("Error at %s:%d\n", __FILE__, __LINE__); \
- exit(1); \
- } \
- } while(0)
-
-namespace marian {
-
-class BackendGPU : public Backend {
-public:
- BackendGPU(DeviceId deviceId, size_t seed) : Backend(deviceId, seed) {
- setDevice();
- setHandles();
- }
-
- void setDevice() {
- cudaSetDevice(deviceId_.no);
- }
-
- cublasHandle_t getCublasHandle() { return cublasHandle_; }
-
- curandGenerator_t getCurandGenerator() { return curandGenerator_; }
-
-private:
- cublasHandle_t cublasHandle_;
- curandGenerator_t curandGenerator_;
-
-
- void setHandles() {
- cublasHandle_ = create_handle();
- curandGenerator_ = createCurandGenerator();
- }
-
-
- curandGenerator_t createCurandGenerator() {
- cudaSetDevice(deviceId_.no);
- curandGenerator_t generator;
- CURAND_CALL(curandCreateGenerator(&generator, CURAND_RNG_PSEUDO_DEFAULT));
- CURAND_CALL(curandSetPseudoRandomGeneratorSeed(generator, seed_));
-
- // cudaStream_t stream = 0;
- // CURAND_CALL(curandSetStream(generator, stream));
- // CURAND_CALL(curandDestroyGenerator(generator));
- return generator;
- }
-
- cublasHandle_t create_handle() {
- cudaSetDevice(deviceId_.no);
- cublasHandle_t cublasHandle;
- cublasCreate(&cublasHandle);
- return cublasHandle;
- }
-};
-}
diff --git a/src/graph/expression_graph.cpp b/src/graph/expression_graph.cpp
index 9014abb5..183b5787 100644
--- a/src/graph/expression_graph.cpp
+++ b/src/graph/expression_graph.cpp
@@ -1,8 +1,7 @@
#include <sstream>
-#include "graph/backend_gpu.h"
#include "graph/expression_graph.h"
-#include "backend/dispatch.h"
+#include "tensors/dispatch.h"
namespace marian {
@@ -11,11 +10,9 @@ ExpressionGraph::ExpressionGraph(bool inference)
void ExpressionGraph::setDevice(DeviceId deviceId) {
if(!backend_) {
- backend_ = New<BackendGPU>(deviceId, Config::seed);
-
+ backend_ = BackendByDevice(deviceId, Config::seed);
params_ = New<Parameters>();
params_->init(backend_->getDevice());
-
tensors_ = New<TensorAllocator>(backend_->getDevice());
}
}
diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h
index 7d50e9ba..defdb475 100644
--- a/src/graph/expression_graph.h
+++ b/src/graph/expression_graph.h
@@ -8,8 +8,8 @@
#include "common/definitions.h"
#include "tensors/tensor_allocator.h"
+#include "tensors/backend.h"
-#include "graph/backend.h"
#include "graph/parameters.h"
#include "graph/chainable.h"
#include "graph/node_operators.h"
diff --git a/src/graph/node.cu b/src/graph/node.cu
deleted file mode 100644
index a289f60c..00000000
--- a/src/graph/node.cu
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "graph/backend_gpu.h"
-#include "graph/expression_graph.h"
-#include "graph/node.h"
-
-namespace marian {
-
-size_t Node::allocate() {
- size_t elements = 0;
- if(!val_) {
- graph()->tensor(val_, shape_);
- elements = val_->shape().elements();
- }
- return elements;
-}
-
-void Node::free() {
- if(graph()) {
- if(val_)
- graph()->free(val_);
- if(adj_)
- graph()->free(adj_);
- }
-}
-
-void Node::init_dependent() {
- if(!adj_) {
- graph()->tensor(adj_, shape_);
- adj_->set(1);
- }
-}
-
-void Node::set_zero_adjoint() {
- if(!adj_) {
- graph()->tensor(adj_, shape_);
- adj_->set(0);
- }
-}
-
-float Node::scalar() {
- return val_->scalar();
-}
-
-Ptr<Backend> Node::getBackend() {
- return graph()->getBackend();
-}
-
-void NaryNodeOp::remove_children_from_top_nodes() {
- for(auto child : children_)
- graph()->remove_top_node(child);
-}
-}
diff --git a/src/graph/node.h b/src/graph/node.h
index 126d7d49..aa450000 100644
--- a/src/graph/node.h
+++ b/src/graph/node.h
@@ -5,10 +5,11 @@
#include <thread>
#include "common/keywords.h"
-#include "graph/backend.h"
-#include "graph/chainable.h"
+#include "tensors/backend.h"
#include "tensors/tensor.h"
+#include "graph/chainable.h"
+
namespace marian {
class Node : public Chainable<Tensor>,
diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h
index 50ead8e6..7d6836a3 100644
--- a/src/graph/node_operators_binary.h
+++ b/src/graph/node_operators_binary.h
@@ -2,10 +2,10 @@
#include <thread>
-#include "graph/backend_gpu.h"
+#include "tensors/gpu/backend.h"
#include "graph/node.h"
-#include "kernels/tensor_operators.h"
#include "functional/functional.h"
+#include "kernels/tensor_operators.h"
#include "kernels/cudnn_wrappers.h"
namespace marian {
@@ -54,7 +54,7 @@ public:
NodeOps forwardOps() {
// C = alpha * dot(op(A), op(B))
return {NodeOp(Prod(
- std::static_pointer_cast<BackendGPU>(getBackend())->getCublasHandle(),
+ std::static_pointer_cast<gpu::Backend>(getBackend())->getCublasHandle(),
val_,
child(0)->val(),
child(1)->val(),
@@ -72,7 +72,7 @@ public:
// to sum gradients from different graph parts
if(!transA_ && transB_)
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
adj_,
@@ -81,7 +81,7 @@ public:
false,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
adj_,
@@ -92,7 +92,7 @@ public:
scalar_))};
if(transA_ && !transB_)
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
child(1)->val(),
@@ -101,7 +101,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
child(0)->val(),
@@ -112,7 +112,7 @@ public:
scalar_))};
if(transA_ && transB_)
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
child(1)->val(),
@@ -121,7 +121,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
adj_,
@@ -131,7 +131,7 @@ public:
1.0,
scalar_))};
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
adj_,
@@ -140,7 +140,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
child(0)->val(),
@@ -198,7 +198,7 @@ public:
using namespace functional;
return {
NodeOp(Prod(
- std::static_pointer_cast<BackendGPU>(getBackend())->getCublasHandle(),
+ std::static_pointer_cast<gpu::Backend>(getBackend())->getCublasHandle(),
val_,
child(0)->val(),
child(1)->val(),
@@ -219,7 +219,7 @@ public:
using namespace functional;
if(!transA_ && transB_)
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
adj_,
@@ -228,7 +228,7 @@ public:
false,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
adj_,
@@ -240,7 +240,7 @@ public:
NodeOp(Add(_1, child(2)->grad(), adj_))};
if(transA_ && !transB_)
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
child(1)->val(),
@@ -249,7 +249,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
child(0)->val(),
@@ -261,7 +261,7 @@ public:
NodeOp(Add(_1, child(2)->grad(), adj_))};
if(transA_ && transB_)
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
child(1)->val(),
@@ -270,7 +270,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
adj_,
@@ -281,7 +281,7 @@ public:
scalar_)),
NodeOp(Add(_1, child(2)->grad(), adj_))};
- return {NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ return {NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
adj_,
@@ -290,7 +290,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(Prod(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(Prod(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
child(0)->val(),
@@ -350,7 +350,7 @@ public:
NodeOps forwardOps() {
// C = alpha * dot(op(A), op(B))
return {NodeOp(ProdBatched(
- std::static_pointer_cast<BackendGPU>(getBackend())->getCublasHandle(),
+ std::static_pointer_cast<gpu::Backend>(getBackend())->getCublasHandle(),
val_,
child(0)->val(),
child(1)->val(),
@@ -369,7 +369,7 @@ public:
if(!transA_ && transB_)
return {
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
adj_,
@@ -378,7 +378,7 @@ public:
false,
1.0,
scalar_)),
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
adj_,
@@ -390,7 +390,7 @@ public:
if(transA_ && !transB_)
return {
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
child(1)->val(),
@@ -399,7 +399,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
child(0)->val(),
@@ -411,7 +411,7 @@ public:
if(transA_ && transB_)
return {
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
child(1)->val(),
@@ -420,7 +420,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
adj_,
@@ -431,7 +431,7 @@ public:
scalar_))};
return {
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(0)->grad(),
adj_,
@@ -440,7 +440,7 @@ public:
true,
1.0,
scalar_)),
- NodeOp(ProdBatched(std::static_pointer_cast<BackendGPU>(getBackend())
+ NodeOp(ProdBatched(std::static_pointer_cast<gpu::Backend>(getBackend())
->getCublasHandle(),
child(1)->grad(),
child(0)->val(),
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index 8390a0c2..a3f27fd2 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -1,10 +1,11 @@
#pragma once
-#include "graph/backend_gpu.h"
+#include "tensors/tensor.h"
+#include "tensors/gpu/backend.h"
+
#include "graph/node.h"
#include "kernels/sparse.h"
#include "kernels/tensor_operators.h"
-#include "tensors/tensor.h"
#include "functional/functional.h"
#include "kernels/cudnn_wrappers.h"
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index d547660a..9d54c01a 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -10,8 +10,8 @@ cuda_add_library(pymarian SHARED
../tensors/tensor.cu
../tensors/device.cpp
../kernels/tensor_operators.cu
- ../backend/gpu/dropout.cu
- ../backend/cpu/dropout.cpp
+ ../tensors/gpu/dropout.cu
+ ../tensors/cpu/dropout.cpp
../kernels/sparse.cu
#../layers/param_initializers.cu
../rnn/attention.cu
diff --git a/src/tests/dropout_test.cu b/src/tests/dropout_test.cu
index 37738deb..f4a42e92 100644
--- a/src/tests/dropout_test.cu
+++ b/src/tests/dropout_test.cu
@@ -13,7 +13,7 @@ int main(int argc, char** argv) {
auto c = New<Config>(argc, argv);
auto g = New<ExpressionGraph>();
- g->setDevice({0, DeviceType::cpu});
+ g->setDevice({0, DeviceType::gpu});
g->reserveWorkspaceMB(512);
for(int i = 0; i < 10; ++i) {