diff options
author | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2018-03-03 05:06:26 +0300 |
---|---|---|
committer | Marcin Junczys-Dowmunt <junczys@amu.edu.pl> | 2018-03-03 05:06:26 +0300 |
commit | 8ec6eef9d57c886513a581ce3c6d3502f5e019b7 (patch) | |
tree | 07660d65c8a51a89d7645089f94ab7d2c901a086 /src | |
parent | 42293372caf2a052dc8d6693b4855413d8de5a14 (diff) |
more coda separation
Diffstat (limited to 'src')
-rw-r--r-- | src/CMakeLists.txt | 40 | ||||
-rw-r--r-- | src/examples/CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/graph/expression_operators.cpp | 5 | ||||
-rw-r--r-- | src/graph/node_operators_unary.h | 6 | ||||
-rw-r--r-- | src/tensors/backend.cpp | 5 | ||||
-rw-r--r-- | src/tensors/cpu/prod.cpp | 4 | ||||
-rw-r--r-- | src/tensors/device.h | 7 | ||||
-rw-r--r-- | src/tensors/dispatch.h | 76 | ||||
-rw-r--r-- | src/tensors/tensor.h | 22 | ||||
-rw-r--r-- | src/tensors/tensor_operators.h | 60 | ||||
-rw-r--r-- | src/tests/CMakeLists.txt | 36 | ||||
-rw-r--r-- | src/translator/beam_search.h | 2 | ||||
-rw-r--r-- | src/translator/helpers.cpp | 4 |
13 files changed, 200 insertions, 72 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a8405cdb..2e3730ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories(.) include_directories(3rd_party) include_directories(3rd_party/SQLiteCpp/include) -cuda_add_library(marian +add_library(marian STATIC common/utils.cpp common/logging.cpp common/config.cpp @@ -26,14 +26,6 @@ cuda_add_library(marian tensors/cpu/prod.cpp tensors/cpu/tensor_operators.cpp - tensors/gpu/device.cu - tensors/gpu/algorithm.cu - tensors/gpu/dropout.cu - tensors/gpu/prod.cu - tensors/gpu/element.cu - tensors/gpu/add.cu - tensors/gpu/tensor_operators.cu - tensors/gpu/cudnn_wrappers.cu graph/expression_graph.cpp graph/expression_operators.cpp @@ -53,26 +45,37 @@ cuda_add_library(marian translator/history.cpp translator/output_collector.cpp - translator/nth_element.cu translator/nth_element.cpp - translator/helpers.cu translator/helpers.cpp translator/scorers.cpp - training/dropper.cu training/graph_group_async.cpp training/graph_group_async_drop.cpp training/graph_group_sync.cpp training/graph_group_singleton.cpp training/graph_group_multinode.cpp - training/sparse_tensor.cu training/validator.cpp rescorer/score_collector.cpp $<TARGET_OBJECTS:libyaml-cpp> - $<TARGET_OBJECTS:SQLiteCpp> - STATIC -) + $<TARGET_OBJECTS:SQLiteCpp>) + +if(CUDA_FOUND) +cuda_add_library(marian_cuda + tensors/gpu/device.cu + tensors/gpu/algorithm.cu + tensors/gpu/dropout.cu + tensors/gpu/prod.cu + tensors/gpu/element.cu + tensors/gpu/add.cu + tensors/gpu/tensor_operators.cu + tensors/gpu/cudnn_wrappers.cu + translator/nth_element.cu + translator/helpers.cu + training/dropper.cu + training/sparse_tensor.cu + STATIC) +endif(CUDA_FOUND) set_target_properties(marian PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") set_target_properties(marian PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") @@ -99,7 +102,10 @@ endif(COMPILE_SERVER) foreach(exec ${EXECUTABLES}) target_link_libraries(${exec} marian ${EXT_LIBS}) - cuda_add_cublas_to_target(${exec}) + if(CUDA_FOUND) + target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT}) + cuda_add_cublas_to_target(${exec}) + endif(CUDA_FOUND) set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") endforeach(exec) diff --git a/src/examples/CMakeLists.txt b/src/examples/CMakeLists.txt index d4aed9a2..bcce6083 100644 --- a/src/examples/CMakeLists.txt +++ b/src/examples/CMakeLists.txt @@ -3,6 +3,9 @@ add_executable(mnist_example mnist/mnist_ffnn.cpp) foreach(exec iris_example mnist_example) target_link_libraries(${exec} marian ${EXT_LIBS}) - cuda_add_cublas_to_target(${exec}) + if(CUDA_FOUND) + target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS}) + cuda_add_cublas_to_target(${exec}) + endif(CUDA_FOUND) set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") endforeach(exec) diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp index be40a0d4..a1c9faa4 100644 --- a/src/graph/expression_operators.cpp +++ b/src/graph/expression_operators.cpp @@ -1,5 +1,4 @@ #include "graph/expression_operators.h" -//#include "kernels/sparse.h" #include "layers/constructors.h" #include "graph/node_operators.h" @@ -333,6 +332,8 @@ Expr shift(Expr a, Shape shift) { // return Expression<LexicalProbNodeOp>(logits, att, eps, lf); //} +#ifdef CUDA_FOUND + Expr avg_pooling( Expr x, int height, @@ -410,4 +411,6 @@ Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven) { return Expression<PoolingWithMaskingOp>(x, mask, width, isEven); } +#endif + } diff --git a/src/graph/node_operators_unary.h b/src/graph/node_operators_unary.h index e857e790..0ca2c2a2 100644 --- a/src/graph/node_operators_unary.h +++ b/src/graph/node_operators_unary.h @@ -1,15 +1,13 @@ #pragma once #include "tensors/tensor.h" -#include "tensors/gpu/backend.h" +#include "tensors/backend.h" #include "graph/node.h" -//#include "kernels/sparse.h" #include "tensors/tensor_operators.h" #include "functional/functional.h" -#include "tensors/gpu/cudnn_wrappers.h" - +//#include "tensors/gpu/cudnn_wrappers.h" namespace marian { diff --git a/src/tensors/backend.cpp b/src/tensors/backend.cpp index a1d66e9a..05a70b18 100644 --- a/src/tensors/backend.cpp +++ b/src/tensors/backend.cpp @@ -1,14 +1,19 @@ #include "tensors/backend.h" +#ifdef CUDA_FOUND #include "tensors/gpu/backend.h" +#endif + #include "tensors/cpu/backend.h" namespace marian { Ptr<Backend> BackendByDevice(DeviceId deviceId, size_t seed) { +#ifdef CUDA_FOUND if(deviceId.type == DeviceType::gpu) return New<gpu::Backend>(deviceId, seed); else +#endif return New<cpu::Backend>(deviceId, seed); } diff --git a/src/tensors/cpu/prod.cpp b/src/tensors/cpu/prod.cpp index c93ba677..927d8048 100644 --- a/src/tensors/cpu/prod.cpp +++ b/src/tensors/cpu/prod.cpp @@ -3,8 +3,8 @@ * SPDX-License-Identifier: MIT */ -#include "tensors/gpu/prod.h" -#include "tensors/gpu/backend.h" +#include "tensors/tensor.h" +#include "tensors/cpu/backend.h" #if MKL_FOUND #include <mkl.h> diff --git a/src/tensors/device.h b/src/tensors/device.h index 22e41bd5..74d68b3d 100644 --- a/src/tensors/device.h +++ b/src/tensors/device.h @@ -59,10 +59,17 @@ namespace cpu { } static inline Ptr<Device> DispatchDevice(DeviceId deviceId, size_t alignment = 256) { +#ifdef CUDA_FOUND if(deviceId.type == DeviceType::gpu) return New<gpu::Device>(deviceId, alignment); else return New<cpu::Device>(deviceId, alignment); +#else + if(deviceId.type == DeviceType::gpu) + ABORT("CUDA support not compiled into marian"); + else + return New<cpu::Device>(deviceId, alignment); +#endif } } diff --git a/src/tensors/dispatch.h b/src/tensors/dispatch.h index d8e218b3..750cfff5 100644 --- a/src/tensors/dispatch.h +++ b/src/tensors/dispatch.h @@ -1,5 +1,6 @@ #pragma once +#ifdef CUDA_FOUND #define DISPATCH1(Function, Arg1) \ namespace gpu { \ @@ -127,3 +128,78 @@ cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \ } +#else + +#define DISPATCH1(Function, Arg1) \ + namespace cpu { \ + void Function(Arg1); \ + } \ + void Function(Arg1 arg1) { \ + cpu::Function(arg1); \ + } + +#define DISPATCH2(Function, Arg1, Arg2) \ + namespace cpu { \ + void Function(Arg1, Arg2); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2) { \ + cpu::Function(arg1, arg2); \ + } + +#define DISPATCH3(Function, Arg1, Arg2, Arg3) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3) { \ + cpu::Function(arg1, arg2, arg3); \ + } + +#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3, Arg4); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4) { \ + cpu::Function(arg1, arg2, arg3, arg4); \ + } + +#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \ + cpu::Function(arg1, arg2, arg3, arg4, arg5); \ + } + +#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \ + cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \ + } + +#define DISPATCH7(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7) { \ + cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \ + } + +#define DISPATCH8(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8) { \ + cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \ + } + +#define DISPATCH9(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \ + namespace cpu { \ + void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \ + } \ + static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8, Arg9 arg9) { \ + cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \ + } + +#endif diff --git a/src/tensors/tensor.h b/src/tensors/tensor.h index 24ca0738..8dc6a223 100644 --- a/src/tensors/tensor.h +++ b/src/tensors/tensor.h @@ -11,7 +11,10 @@ #include "tensors/backend.h" #include <algorithm> + +#ifdef CUDA_FOUND #include "tensors/gpu/algorithm.h" +#endif namespace marian { @@ -53,32 +56,40 @@ public: float get(size_t i) { float temp; +#ifdef CUDA_FOUND if(backend_->getDevice().type == DeviceType::gpu) gpu::copy(backend_, data() + i, data() + i + 1, &temp); else +#endif std::copy(data() + i, data() + i + 1, &temp); return temp; } void set(size_t i, float value) { +#ifdef CUDA_FOUND if(backend_->getDevice().type == DeviceType::gpu) gpu::copy(backend_, &value, &value + 1, data() + i); else +#endif std::copy(&value, &value + 1, data() + i); } void get(std::vector<float> &v) { v.resize(size()); +#ifdef CUDA_FOUND if(backend_->getDevice().type == DeviceType::gpu) gpu::copy(backend_, data(), data() + size(), v.data()); else +#endif std::copy(data(), data() + size(), v.data()); } void set(const float* begin, const float* end) { +#ifdef CUDA_FOUND if(backend_->getDevice().type == DeviceType::gpu) gpu::copy(backend_, begin, end, data()); else +#endif std::copy(begin, end, data()); } @@ -87,27 +98,32 @@ public: } void set(float value) { +#ifdef CUDA_FOUND if(backend_->getDevice().type == DeviceType::gpu) gpu::fill(backend_, data(), data() + size(), value); else +#endif std::fill(data(), data() + size(), value); } void setSparse(const std::vector<size_t> &k, const std::vector<float> &v) { - if(backend_->getDevice().type == DeviceType::gpu) { +#ifdef CUDA_FOUND + if(backend_->getDevice().type == DeviceType::gpu) gpu::setSparse(backend_, k, v, data()); - } else { + else +#endif for(int i = 0; i < k.size(); ++i) data()[k[i]] = v[i]; - } } void copyFrom(Tensor in) { +#ifdef CUDA_FOUND if(in->getBackend()->getDevice().type == DeviceType::gpu || backend_->getDevice().type == DeviceType::gpu) gpu::copy(backend_, in->data(), in->data() + in->size(), data()); else +#endif std::copy(in->data(), in->data() + in->size(), data()); } diff --git a/src/tensors/tensor_operators.h b/src/tensors/tensor_operators.h index 53b7eb0f..7086b97b 100644 --- a/src/tensors/tensor_operators.h +++ b/src/tensors/tensor_operators.h @@ -10,9 +10,11 @@ #include "functional/tmp.h" #include "functional/tensor.h" +#ifdef CUDA_FOUND #include "tensors/gpu/element.h" #include "tensors/gpu/add.h" #include "tensors/gpu/prod.h" +#endif #include "tensors/cpu/element.h" #include "tensors/cpu/add.h" @@ -21,9 +23,11 @@ namespace marian { template <class Functor, class ...Tensors> void Element(Functor functor, marian::Tensor out, Tensors ...tensors) { +#ifdef CUDA_FOUND if(out->getBackend()->getDevice().type == DeviceType::gpu) gpu::Element(functor, out, tensors...); else +#endif cpu::Element(functor, out, tensors...); } @@ -32,9 +36,11 @@ namespace marian { float scale, marian::Tensor out, Tensors... tensors) { +#ifdef CUDA_FOUND if(out->getBackend()->getDevice().type == DeviceType::gpu) gpu::Add(functor, scale, out, tensors...); else +#endif cpu::Add(functor, scale, out, tensors...); } @@ -81,21 +87,23 @@ namespace marian { DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int) +#ifdef CUDA_FOUND namespace gpu { void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax); } +#endif namespace cpu { void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax); } static inline void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax) { - if(in->getBackend()->getDevice().type == DeviceType::gpu) { +#ifdef CUDA_FOUND + if(in->getBackend()->getDevice().type == DeviceType::gpu) gpu::Deconcatenate(outputs, in, ax); - } - else { + else +#endif cpu::Deconcatenate(outputs, in, ax); - } } DISPATCH5(LayerNormalization, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float) @@ -116,11 +124,13 @@ namespace marian { DISPATCH2(LSTMCellForward, marian::Tensor, std::vector<marian::Tensor>) DISPATCH2(LSTMOutputForward, marian::Tensor, std::vector<marian::Tensor>); +#ifdef CUDA_FOUND namespace gpu { void LSTMCellBackward(std::vector<marian::Tensor> outputs, std::vector<marian::Tensor> inputs, marian::Tensor adj); } +#endif namespace cpu { void LSTMCellBackward(std::vector<marian::Tensor> outputs, @@ -131,19 +141,21 @@ namespace marian { static inline void LSTMCellBackward(std::vector<marian::Tensor> outputs, std::vector<marian::Tensor> inputs, marian::Tensor adj) { - if(adj->getBackend()->getDevice().type == DeviceType::gpu) { +#ifdef CUDA_FOUND + if(adj->getBackend()->getDevice().type == DeviceType::gpu) gpu::LSTMCellBackward(outputs, inputs, adj); - } - else { + else +#endif cpu::LSTMCellBackward(outputs, inputs, adj); - } } +#ifdef CUDA_FOUND namespace gpu { void LSTMOutputBackward(std::vector<marian::Tensor> outputs, std::vector<marian::Tensor> inputs, marian::Tensor adj); } +#endif namespace cpu { void LSTMOutputBackward(std::vector<marian::Tensor> outputs, @@ -154,22 +166,24 @@ namespace marian { static inline void LSTMOutputBackward(std::vector<marian::Tensor> outputs, std::vector<marian::Tensor> inputs, marian::Tensor adj) { - if(adj->getBackend()->getDevice().type == DeviceType::gpu) { +#ifdef CUDA_FOUND + if(adj->getBackend()->getDevice().type == DeviceType::gpu) gpu::LSTMOutputBackward(outputs, inputs, adj); - } - else { + else +#endif cpu::LSTMOutputBackward(outputs, inputs, adj); - } } DISPATCH3(GRUFastForward, marian::Tensor, std::vector<marian::Tensor>, bool) +#ifdef CUDA_FOUND namespace gpu { void GRUFastBackward(std::vector<marian::Tensor> outputs, std::vector<marian::Tensor> inputs, marian::Tensor adj, bool final); } +#endif namespace cpu { void GRUFastBackward(std::vector<marian::Tensor> outputs, @@ -182,35 +196,37 @@ namespace marian { std::vector<marian::Tensor> inputs, marian::Tensor adj, bool final = false) { - if(adj->getBackend()->getDevice().type == DeviceType::gpu) { +#ifdef CUDA_FOUND + if(adj->getBackend()->getDevice().type == DeviceType::gpu) gpu::GRUFastBackward(outputs, inputs, adj, final); - } - else { + else +#endif cpu::GRUFastBackward(outputs, inputs, adj, final); - } } DISPATCH4(Att, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor) DISPATCH7(AttBack, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor) +#ifdef CUDA_FOUND namespace gpu { float L2Norm(marian::Tensor in); } +#endif namespace cpu { float L2Norm(marian::Tensor in); } static inline float L2Norm(marian::Tensor in) { - if(in->getBackend()->getDevice().type == DeviceType::gpu) { +#ifdef CUDA_FOUND + if(in->getBackend()->getDevice().type == DeviceType::gpu) return gpu::L2Norm(in); - } - else { + else +#endif return cpu::L2Norm(in); - } } - + DISPATCH5(PoolingWithMaskingForward, marian::Tensor, marian::Tensor, marian::Tensor, int, bool) DISPATCH6(PoolingWithMaskingBackward, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, int, bool) - + } diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 16882b52..5a021e96 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -9,7 +9,11 @@ set(UNIT_TESTS foreach(test ${UNIT_TESTS}) add_executable("run_${test}" run_tests.cpp "${test}.cpp") target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch) - cuda_add_cublas_to_target("run_${test}") + + if(CUDA_FOUND) + target_link_libraries("run_${test}" marian marian_cuda ${EXT_LIBS} Catch) + cuda_add_cublas_to_target("run_${test}") + endif(CUDA_FOUND) add_test(NAME ${test} COMMAND "run_${test}") endforeach(test) @@ -18,35 +22,23 @@ endforeach(test) # Testing apps add_executable(logger_test logger_test.cpp) add_executable(dropout_test dropout_test.cpp) -#cuda_add_executable(bn_test bn_test.cu) + +if(CUDA_FOUND) cuda_add_executable(pooling_test pooling_test.cu) -#cuda_add_executable(marian_test marian_test.cu) -cuda_add_executable(tensor_test tensor_test.cu) +endif(CUDA_FOUND) add_executable(sqlite_test sqlite_test.cpp) - foreach(exec logger_test dropout_test - pooling_test - #marian_test - #bn_test - tensor_test sqlite_test -) + ) target_link_libraries(${exec} marian ${EXT_LIBS}) - cuda_add_cublas_to_target(${exec}) + if(CUDA_FOUND) + target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} Catch) + cuda_add_cublas_to_target(${exec}) + endif(CUDA_FOUND) + set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") endforeach(exec) -# if(CUDNN_FOUND) - # cuda_add_executable(conv_test conv_test.cu) - - # foreach(exec - # conv_test - # ) - # target_link_libraries(${exec} marian ${EXT_LIBS}) - # cuda_add_cublas_to_target(${exec}) - # set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") - # endforeach(exec) -# endif(CUDNN_FOUND) diff --git a/src/translator/beam_search.h b/src/translator/beam_search.h index 20b7b628..cfe94f59 100644 --- a/src/translator/beam_search.h +++ b/src/translator/beam_search.h @@ -105,9 +105,11 @@ public: // @TODO: unify this Ptr<NthElement> nth; +#ifdef CUDA_FOUND if(graph->getDevice().type == DeviceType::gpu) nth = New<NthElementGPU>(localBeamSize, dimBatch, graph->getDevice()); else +#endif nth = New<NthElementCPU>(localBeamSize, dimBatch); Beams beams(dimBatch); diff --git a/src/translator/helpers.cpp b/src/translator/helpers.cpp index 317f0cb0..f112e405 100644 --- a/src/translator/helpers.cpp +++ b/src/translator/helpers.cpp @@ -38,18 +38,22 @@ void suppressUnk(Expr probs) { if(probs->val()->getBackend()->getDevice().type == DeviceType::cpu) { cpu::suppressUnk(probs); } +#ifdef CUDA_FOUND else { gpu::suppressUnk(probs); } +#endif } void suppressWord(Expr probs, Word id) { if(probs->val()->getBackend()->getDevice().type == DeviceType::cpu) { cpu::suppressWord(probs, id); } +#ifdef CUDA_FOUND else { gpu::suppressWord(probs, id); } +#endif } } |