Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-10-10 16:15:54 +0300
committerMarcin Junczys-Dowmunt <junczys@amu.edu.pl>2016-10-10 16:25:11 +0300
commitbab8badb02734ad51f7b5b8c1922d04c8cc4a104 (patch)
tree2d6e47aee4e44162da2916b876e851248d25bcfc
parent0d4b58712262a05498a42656fbd6a413a7652036 (diff)
cuda options, fixes #13
-rw-r--r--CMakeLists.txt23
-rw-r--r--src/gpu/nth_benchmark.cu102
2 files changed, 114 insertions, 11 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95d13e6d..6d044db3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,23 +8,24 @@ SET(CMAKE_CXX_FLAGS " -std=c++14 -m64 -flto -march=native -fPIC -g -O3 -Ofast -f
include_directories(${amunmt_SOURCE_DIR})
-set(CUDA ON CACHE BOOL "Compile without CUDA")
+option(CUDA "Select to compile CUDA support by default if available" ON)
-if(NOT CUDA)
- message("-- Forcing compilation without CUDA.")
- add_definitions(-DNO_CUDA)
-else(NOT CUDA)
-find_package(CUDA)
-if(CUDA_FOUND)
+if(CUDA)
+ find_package(CUDA)
+ if(CUDA_FOUND)
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math;)
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
add_definitions(-DCUDA)
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
-else(CUDA_FOUND)
+ message("-- Compiling with CUDA support")
+ else(CUDA_FOUND)
+ add_definitions(-DNO_CUDA)
+ message("-- Cannot find CUDA libraries. Compiling without them." )
+ endif(CUDA_FOUND)
+else(CUDA)
+ message("-- Forcing compilation without CUDA.")
add_definitions(-DNO_CUDA)
- message("Cannot find CUDA libraries. Compiling without them." )
-endif(CUDA_FOUND)
-endif(NOT CUDA)
+endif(CUDA)
find_package(Boost COMPONENTS system filesystem program_options timer iostreams python thread)
if(Boost_FOUND)
diff --git a/src/gpu/nth_benchmark.cu b/src/gpu/nth_benchmark.cu
new file mode 100644
index 00000000..653e4d7c
--- /dev/null
+++ b/src/gpu/nth_benchmark.cu
@@ -0,0 +1,102 @@
+
+#include <cmath>
+#include <memory>
+#include <sstream>
+#include <random>
+#include <algorithm>
+
+#include <boost/timer/timer.hpp>
+#include "gpu/mblas/matrix.h"
+#include "gpu/nth_element.h"
+
+using namespace thrust::placeholders;
+
+struct ProbCompare {
+ ProbCompare(const float* data) : data_(data) {}
+
+ __host__ __device__
+ bool operator()(const unsigned a, const unsigned b) {
+ return data_[a] > data_[b];
+ }
+
+ const float* data_;
+};
+
+int main(int argc, char** argv) {
+
+ size_t beamSize = 5;
+ size_t vocSize = 30000;
+ size_t tests = 1000;
+
+ std::vector<float> rands(beamSize * vocSize);
+ std::random_device rnd_device;
+ std::mt19937 mersenne_engine(rnd_device());
+ std::uniform_real_distribution<float> dist(-100, -4);
+
+ auto gen = std::bind(dist, mersenne_engine);
+ std::generate(std::begin(rands), std::end(rands), gen);
+
+ rands[10000 + 30000 * 0] = -.1;
+ rands[10001 + 30000 * 1] = -.1;
+ rands[10002 + 30000 * 2] = -.1;
+ rands[10003 + 30000 * 3] = -.1;
+ rands[10004 + 30000 * 4] = -.1;
+
+ DeviceVector<float> ProbsOrig(beamSize * vocSize);
+ thrust::copy(rands.begin(), rands.end(), ProbsOrig.begin());
+
+ DeviceVector<float> Probs(beamSize * vocSize);
+
+ DeviceVector<unsigned> keys(Probs.size());
+ HostVector<unsigned> bestKeys(beamSize);
+ HostVector<float> bestCosts(beamSize);
+
+ while(0) {
+ boost::timer::cpu_timer timer;
+ for(int i = 0; i < tests; ++i) {
+ thrust::copy(ProbsOrig.begin(), ProbsOrig.end(), Probs.begin());
+
+ thrust::sequence(keys.begin(), keys.end());
+ thrust::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(),
+ ProbCompare(thrust::raw_pointer_cast(Probs.data())));
+
+ //for(int i = 0; i < beamSize; ++i) {
+ // bestKeys[i] = keys[i];
+ // bestCosts[i] = Probs[keys[i]];
+ //}
+ }
+ std::cerr << "Search took " << timer.format(3, "%ws");
+ }
+
+ {
+ boost::timer::cpu_timer timer;
+ for(int i = 0; i < tests; ++i) {
+ thrust::copy(ProbsOrig.begin(), ProbsOrig.end(), Probs.begin());
+
+ for(size_t j = 0; j < beamSize; ++j) {
+ DeviceVector<float>::iterator iter =
+ algo::max_element(Probs.begin(), Probs.end());
+ bestKeys[j] = iter - Probs.begin();
+ bestCosts[j] = *iter;
+ *iter = std::numeric_limits<float>::lowest();
+ }
+ algo::copy(bestKeys.begin(), bestKeys.end(), keys.begin());
+ }
+ std::cerr << "Search took " << timer.format(3, "%ws") << std::endl;
+ }
+
+ {
+ boost::timer::cpu_timer timer;
+ for(int i = 0; i < tests; ++i) {
+ thrust::copy(ProbsOrig.begin(), ProbsOrig.end(), Probs.begin());
+
+ thrust::sequence(keys.begin(), keys.end());
+ thrust::sort_by_key(Probs.begin(), Probs.end(),
+ keys.begin(), algo::greater<float>());
+
+ algo::copy_n(keys.begin(), beamSize, bestKeys.begin());
+ algo::copy_n(Probs.begin(), beamSize, bestCosts.begin());
+ }
+ std::cerr << "Search took " << timer.format(3, "%ws") << std::endl;
+ }
+}