Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-03-03 05:06:26 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2018-03-03 05:06:26 +0300
commit8ec6eef9d57c886513a581ce3c6d3502f5e019b7 (patch)
tree07660d65c8a51a89d7645089f94ab7d2c901a086 /src
parent42293372caf2a052dc8d6693b4855413d8de5a14 (diff)
more coda separation
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt40
-rw-r--r--src/examples/CMakeLists.txt5
-rw-r--r--src/graph/expression_operators.cpp5
-rw-r--r--src/graph/node_operators_unary.h6
-rw-r--r--src/tensors/backend.cpp5
-rw-r--r--src/tensors/cpu/prod.cpp4
-rw-r--r--src/tensors/device.h7
-rw-r--r--src/tensors/dispatch.h76
-rw-r--r--src/tensors/tensor.h22
-rw-r--r--src/tensors/tensor_operators.h60
-rw-r--r--src/tests/CMakeLists.txt36
-rw-r--r--src/translator/beam_search.h2
-rw-r--r--src/translator/helpers.cpp4
13 files changed, 200 insertions, 72 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a8405cdb..2e3730ad 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -4,7 +4,7 @@ include_directories(.)
include_directories(3rd_party)
include_directories(3rd_party/SQLiteCpp/include)
-cuda_add_library(marian
+add_library(marian STATIC
common/utils.cpp
common/logging.cpp
common/config.cpp
@@ -26,14 +26,6 @@ cuda_add_library(marian
tensors/cpu/prod.cpp
tensors/cpu/tensor_operators.cpp
- tensors/gpu/device.cu
- tensors/gpu/algorithm.cu
- tensors/gpu/dropout.cu
- tensors/gpu/prod.cu
- tensors/gpu/element.cu
- tensors/gpu/add.cu
- tensors/gpu/tensor_operators.cu
- tensors/gpu/cudnn_wrappers.cu
graph/expression_graph.cpp
graph/expression_operators.cpp
@@ -53,26 +45,37 @@ cuda_add_library(marian
translator/history.cpp
translator/output_collector.cpp
- translator/nth_element.cu
translator/nth_element.cpp
- translator/helpers.cu
translator/helpers.cpp
translator/scorers.cpp
- training/dropper.cu
training/graph_group_async.cpp
training/graph_group_async_drop.cpp
training/graph_group_sync.cpp
training/graph_group_singleton.cpp
training/graph_group_multinode.cpp
- training/sparse_tensor.cu
training/validator.cpp
rescorer/score_collector.cpp
$<TARGET_OBJECTS:libyaml-cpp>
- $<TARGET_OBJECTS:SQLiteCpp>
- STATIC
-)
+ $<TARGET_OBJECTS:SQLiteCpp>)
+
+if(CUDA_FOUND)
+cuda_add_library(marian_cuda
+ tensors/gpu/device.cu
+ tensors/gpu/algorithm.cu
+ tensors/gpu/dropout.cu
+ tensors/gpu/prod.cu
+ tensors/gpu/element.cu
+ tensors/gpu/add.cu
+ tensors/gpu/tensor_operators.cu
+ tensors/gpu/cudnn_wrappers.cu
+ translator/nth_element.cu
+ translator/helpers.cu
+ training/dropper.cu
+ training/sparse_tensor.cu
+ STATIC)
+endif(CUDA_FOUND)
set_target_properties(marian PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
set_target_properties(marian PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
@@ -99,7 +102,10 @@ endif(COMPILE_SERVER)
foreach(exec ${EXECUTABLES})
target_link_libraries(${exec} marian ${EXT_LIBS})
- cuda_add_cublas_to_target(${exec})
+ if(CUDA_FOUND)
+ target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
+ cuda_add_cublas_to_target(${exec})
+ endif(CUDA_FOUND)
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
endforeach(exec)
diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt
index d4aed9a2..bcce6083 100644
--- a/src/examples/CMakeLists.txt
+++ b/src/examples/CMakeLists.txt
@@ -3,6 +3,9 @@ add_executable(mnist_example mnist/mnist_ffnn.cpp)
foreach(exec iris_example mnist_example)
target_link_libraries(${exec} marian ${EXT_LIBS})
- cuda_add_cublas_to_target(${exec})
+ if(CUDA_FOUND)
+ target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS})
+ cuda_add_cublas_to_target(${exec})
+ endif(CUDA_FOUND)
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
endforeach(exec)
diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp
index be40a0d4..a1c9faa4 100644
--- a/src/graph/expression_operators.cpp
+++ b/src/graph/expression_operators.cpp
@@ -1,5 +1,4 @@
#include "graph/expression_operators.h"
-//#include "kernels/sparse.h"
#include "layers/constructors.h"
#include "graph/node_operators.h"
@@ -333,6 +332,8 @@ Expr shift(Expr a, Shape shift) {
// return Expression<LexicalProbNodeOp>(logits, att, eps, lf);
//}
+#ifdef CUDA_FOUND
+
Expr avg_pooling(
Expr x,
int height,
@@ -410,4 +411,6 @@ Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven) {
return Expression<PoolingWithMaskingOp>(x, mask, width, isEven);
}
+#endif
+
}
diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h
index e857e790..0ca2c2a2 100644
--- a/src/graph/node_operators_unary.h
+++ b/src/graph/node_operators_unary.h
@@ -1,15 +1,13 @@
#pragma once
#include "tensors/tensor.h"
-#include "tensors/gpu/backend.h"
+#include "tensors/backend.h"
#include "graph/node.h"
-//#include "kernels/sparse.h"
#include "tensors/tensor_operators.h"
#include "functional/functional.h"
-#include "tensors/gpu/cudnn_wrappers.h"
-
+//#include "tensors/gpu/cudnn_wrappers.h"
namespace marian {
diff --git a/src/tensors/backend.cpp b/src/tensors/backend.cpp
index a1d66e9a..05a70b18 100644
--- a/src/tensors/backend.cpp
+++ b/src/tensors/backend.cpp
@@ -1,14 +1,19 @@
#include "tensors/backend.h"
+#ifdef CUDA_FOUND
#include "tensors/gpu/backend.h"
+#endif
+
#include "tensors/cpu/backend.h"
namespace marian {
Ptr<Backend> BackendByDevice(DeviceId deviceId, size_t seed) {
+#ifdef CUDA_FOUND
if(deviceId.type == DeviceType::gpu)
return New<gpu::Backend>(deviceId, seed);
else
+#endif
return New<cpu::Backend>(deviceId, seed);
}
diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp
index c93ba677..927d8048 100644
--- a/src/tensors/cpu/prod.cpp
+++ b/src/tensors/cpu/prod.cpp
@@ -3,8 +3,8 @@
* SPDX-License-Identifier: MIT
*/
-#include "tensors/gpu/prod.h"
-#include "tensors/gpu/backend.h"
+#include "tensors/tensor.h"
+#include "tensors/cpu/backend.h"
#if MKL_FOUND
#include <mkl.h>
diff --git a/src/tensors/device.h b/src/tensors/device.h
index 22e41bd5..74d68b3d 100644
--- a/src/tensors/device.h
+++ b/src/tensors/device.h
@@ -59,10 +59,17 @@ namespace cpu {
}
static inline Ptr<Device> DispatchDevice(DeviceId deviceId, size_t alignment = 256) {
+#ifdef CUDA_FOUND
if(deviceId.type == DeviceType::gpu)
return New<gpu::Device>(deviceId, alignment);
else
return New<cpu::Device>(deviceId, alignment);
+#else
+ if(deviceId.type == DeviceType::gpu)
+ ABORT("CUDA support not compiled into marian");
+ else
+ return New<cpu::Device>(deviceId, alignment);
+#endif
}
}
diff --git a/src/tensors/dispatch.h b/src/tensors/dispatch.h
index d8e218b3..750cfff5 100644
--- a/src/tensors/dispatch.h
+++ b/src/tensors/dispatch.h
@@ -1,5 +1,6 @@
#pragma once
+#ifdef CUDA_FOUND
#define DISPATCH1(Function, Arg1) \
namespace gpu { \
@@ -127,3 +128,78 @@
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
}
+#else
+
+#define DISPATCH1(Function, Arg1) \
+ namespace cpu { \
+ void Function(Arg1); \
+ } \
+ void Function(Arg1 arg1) { \
+ cpu::Function(arg1); \
+ }
+
+#define DISPATCH2(Function, Arg1, Arg2) \
+ namespace cpu { \
+ void Function(Arg1, Arg2); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2) { \
+ cpu::Function(arg1, arg2); \
+ }
+
+#define DISPATCH3(Function, Arg1, Arg2, Arg3) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3) { \
+ cpu::Function(arg1, arg2, arg3); \
+ }
+
+#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4) { \
+ cpu::Function(arg1, arg2, arg3, arg4); \
+ }
+
+#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5); \
+ }
+
+#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
+ }
+
+#define DISPATCH7(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
+ }
+
+#define DISPATCH8(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
+ }
+
+#define DISPATCH9(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \
+ namespace cpu { \
+ void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
+ } \
+ static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8, Arg9 arg9) { \
+ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
+ }
+
+#endif
diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h
index 24ca0738..8dc6a223 100644
--- a/src/tensors/tensor.h
+++ b/src/tensors/tensor.h
@@ -11,7 +11,10 @@
#include "tensors/backend.h"
#include <algorithm>
+
+#ifdef CUDA_FOUND
#include "tensors/gpu/algorithm.h"
+#endif
namespace marian {
@@ -53,32 +56,40 @@ public:
float get(size_t i) {
float temp;
+#ifdef CUDA_FOUND
if(backend_->getDevice().type == DeviceType::gpu)
gpu::copy(backend_, data() + i, data() + i + 1, &temp);
else
+#endif
std::copy(data() + i, data() + i + 1, &temp);
return temp;
}
void set(size_t i, float value) {
+#ifdef CUDA_FOUND
if(backend_->getDevice().type == DeviceType::gpu)
gpu::copy(backend_, &value, &value + 1, data() + i);
else
+#endif
std::copy(&value, &value + 1, data() + i);
}
void get(std::vector<float> &v) {
v.resize(size());
+#ifdef CUDA_FOUND
if(backend_->getDevice().type == DeviceType::gpu)
gpu::copy(backend_, data(), data() + size(), v.data());
else
+#endif
std::copy(data(), data() + size(), v.data());
}
void set(const float* begin, const float* end) {
+#ifdef CUDA_FOUND
if(backend_->getDevice().type == DeviceType::gpu)
gpu::copy(backend_, begin, end, data());
else
+#endif
std::copy(begin, end, data());
}
@@ -87,27 +98,32 @@ public:
}
void set(float value) {
+#ifdef CUDA_FOUND
if(backend_->getDevice().type == DeviceType::gpu)
gpu::fill(backend_, data(), data() + size(), value);
else
+#endif
std::fill(data(), data() + size(), value);
}
void setSparse(const std::vector<size_t> &k,
const std::vector<float> &v) {
- if(backend_->getDevice().type == DeviceType::gpu) {
+#ifdef CUDA_FOUND
+ if(backend_->getDevice().type == DeviceType::gpu)
gpu::setSparse(backend_, k, v, data());
- } else {
+ else
+#endif
for(int i = 0; i < k.size(); ++i)
data()[k[i]] = v[i];
- }
}
void copyFrom(Tensor in) {
+#ifdef CUDA_FOUND
if(in->getBackend()->getDevice().type == DeviceType::gpu ||
backend_->getDevice().type == DeviceType::gpu)
gpu::copy(backend_, in->data(), in->data() + in->size(), data());
else
+#endif
std::copy(in->data(), in->data() + in->size(), data());
}
diff --git a/src/tensors/tensor_operators.h b/src/tensors/tensor_operators.h
index 53b7eb0f..7086b97b 100644
--- a/src/tensors/tensor_operators.h
+++ b/src/tensors/tensor_operators.h
@@ -10,9 +10,11 @@
#include "functional/tmp.h"
#include "functional/tensor.h"
+#ifdef CUDA_FOUND
#include "tensors/gpu/element.h"
#include "tensors/gpu/add.h"
#include "tensors/gpu/prod.h"
+#endif
#include "tensors/cpu/element.h"
#include "tensors/cpu/add.h"
@@ -21,9 +23,11 @@ namespace marian {
template <class Functor, class ...Tensors>
void Element(Functor functor, marian::Tensor out, Tensors ...tensors) {
+#ifdef CUDA_FOUND
if(out->getBackend()->getDevice().type == DeviceType::gpu)
gpu::Element(functor, out, tensors...);
else
+#endif
cpu::Element(functor, out, tensors...);
}
@@ -32,9 +36,11 @@ namespace marian {
float scale,
marian::Tensor out,
Tensors... tensors) {
+#ifdef CUDA_FOUND
if(out->getBackend()->getDevice().type == DeviceType::gpu)
gpu::Add(functor, scale, out, tensors...);
else
+#endif
cpu::Add(functor, scale, out, tensors...);
}
@@ -81,21 +87,23 @@ namespace marian {
DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int)
+#ifdef CUDA_FOUND
namespace gpu {
void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax);
}
+#endif
namespace cpu {
void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax);
}
static inline void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax) {
- if(in->getBackend()->getDevice().type == DeviceType::gpu) {
+#ifdef CUDA_FOUND
+ if(in->getBackend()->getDevice().type == DeviceType::gpu)
gpu::Deconcatenate(outputs, in, ax);
- }
- else {
+ else
+#endif
cpu::Deconcatenate(outputs, in, ax);
- }
}
DISPATCH5(LayerNormalization, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
@@ -116,11 +124,13 @@ namespace marian {
DISPATCH2(LSTMCellForward, marian::Tensor, std::vector<marian::Tensor>)
DISPATCH2(LSTMOutputForward, marian::Tensor, std::vector<marian::Tensor>);
+#ifdef CUDA_FOUND
namespace gpu {
void LSTMCellBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj);
}
+#endif
namespace cpu {
void LSTMCellBackward(std::vector<marian::Tensor> outputs,
@@ -131,19 +141,21 @@ namespace marian {
static inline void LSTMCellBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj) {
- if(adj->getBackend()->getDevice().type == DeviceType::gpu) {
+#ifdef CUDA_FOUND
+ if(adj->getBackend()->getDevice().type == DeviceType::gpu)
gpu::LSTMCellBackward(outputs, inputs, adj);
- }
- else {
+ else
+#endif
cpu::LSTMCellBackward(outputs, inputs, adj);
- }
}
+#ifdef CUDA_FOUND
namespace gpu {
void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj);
}
+#endif
namespace cpu {
void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
@@ -154,22 +166,24 @@ namespace marian {
static inline void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj) {
- if(adj->getBackend()->getDevice().type == DeviceType::gpu) {
+#ifdef CUDA_FOUND
+ if(adj->getBackend()->getDevice().type == DeviceType::gpu)
gpu::LSTMOutputBackward(outputs, inputs, adj);
- }
- else {
+ else
+#endif
cpu::LSTMOutputBackward(outputs, inputs, adj);
- }
}
DISPATCH3(GRUFastForward, marian::Tensor, std::vector<marian::Tensor>, bool)
+#ifdef CUDA_FOUND
namespace gpu {
void GRUFastBackward(std::vector<marian::Tensor> outputs,
std::vector<marian::Tensor> inputs,
marian::Tensor adj,
bool final);
}
+#endif
namespace cpu {
void GRUFastBackward(std::vector<marian::Tensor> outputs,
@@ -182,35 +196,37 @@ namespace marian {
std::vector<marian::Tensor> inputs,
marian::Tensor adj,
bool final = false) {
- if(adj->getBackend()->getDevice().type == DeviceType::gpu) {
+#ifdef CUDA_FOUND
+ if(adj->getBackend()->getDevice().type == DeviceType::gpu)
gpu::GRUFastBackward(outputs, inputs, adj, final);
- }
- else {
+ else
+#endif
cpu::GRUFastBackward(outputs, inputs, adj, final);
- }
}
DISPATCH4(Att, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
DISPATCH7(AttBack, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
+#ifdef CUDA_FOUND
namespace gpu {
float L2Norm(marian::Tensor in);
}
+#endif
namespace cpu {
float L2Norm(marian::Tensor in);
}
static inline float L2Norm(marian::Tensor in) {
- if(in->getBackend()->getDevice().type == DeviceType::gpu) {
+#ifdef CUDA_FOUND
+ if(in->getBackend()->getDevice().type == DeviceType::gpu)
return gpu::L2Norm(in);
- }
- else {
+ else
+#endif
return cpu::L2Norm(in);
- }
}
-
+
DISPATCH5(PoolingWithMaskingForward, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
DISPATCH6(PoolingWithMaskingBackward, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
-
+
}
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 16882b52..5a021e96 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -9,7 +9,11 @@ set(UNIT_TESTS
foreach(test ${UNIT_TESTS})
add_executable("run_${test}" run_tests.cpp "${test}.cpp")
target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch)
- cuda_add_cublas_to_target("run_${test}")
+
+ if(CUDA_FOUND)
+ target_link_libraries("run_${test}" marian marian_cuda ${EXT_LIBS} Catch)
+ cuda_add_cublas_to_target("run_${test}")
+ endif(CUDA_FOUND)
add_test(NAME ${test} COMMAND "run_${test}")
endforeach(test)
@@ -18,35 +22,23 @@ endforeach(test)
# Testing apps
add_executable(logger_test logger_test.cpp)
add_executable(dropout_test dropout_test.cpp)
-#cuda_add_executable(bn_test bn_test.cu)
+
+if(CUDA_FOUND)
cuda_add_executable(pooling_test pooling_test.cu)
-#cuda_add_executable(marian_test marian_test.cu)
-cuda_add_executable(tensor_test tensor_test.cu)
+endif(CUDA_FOUND)
add_executable(sqlite_test sqlite_test.cpp)
-
foreach(exec
logger_test
dropout_test
- pooling_test
- #marian_test
- #bn_test
- tensor_test
sqlite_test
-)
+ )
target_link_libraries(${exec} marian ${EXT_LIBS})
- cuda_add_cublas_to_target(${exec})
+ if(CUDA_FOUND)
+ target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} Catch)
+ cuda_add_cublas_to_target(${exec})
+ endif(CUDA_FOUND)
+
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
endforeach(exec)
-# if(CUDNN_FOUND)
- # cuda_add_executable(conv_test conv_test.cu)
-
- # foreach(exec
- # conv_test
- # )
- # target_link_libraries(${exec} marian ${EXT_LIBS})
- # cuda_add_cublas_to_target(${exec})
- # set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
- # endforeach(exec)
-# endif(CUDNN_FOUND)
diff --git a/src/translator/beam_search.h b/src/translator/beam_search.h
index 20b7b628..cfe94f59 100644
--- a/src/translator/beam_search.h
+++ b/src/translator/beam_search.h
@@ -105,9 +105,11 @@ public:
// @TODO: unify this
Ptr<NthElement> nth;
+#ifdef CUDA_FOUND
if(graph->getDevice().type == DeviceType::gpu)
nth = New<NthElementGPU>(localBeamSize, dimBatch, graph->getDevice());
else
+#endif
nth = New<NthElementCPU>(localBeamSize, dimBatch);
Beams beams(dimBatch);
diff --git a/src/translator/helpers.cpp b/src/translator/helpers.cpp
index 317f0cb0..f112e405 100644
--- a/src/translator/helpers.cpp
+++ b/src/translator/helpers.cpp
@@ -38,18 +38,22 @@ void suppressUnk(Expr probs) {
if(probs->val()->getBackend()->getDevice().type == DeviceType::cpu) {
cpu::suppressUnk(probs);
}
+#ifdef CUDA_FOUND
else {
gpu::suppressUnk(probs);
}
+#endif
}
void suppressWord(Expr probs, Word id) {
if(probs->val()->getBackend()->getDevice().type == DeviceType::cpu) {
cpu::suppressWord(probs, id);
}
+#ifdef CUDA_FOUND
else {
gpu::suppressWord(probs, id);
}
+#endif
}
}