cuda options, fixes #13

author: Marcin Junczys-Dowmunt <junczys@amu.edu.pl> 2016-10-10 16:15:54 +0300
committer: Marcin Junczys-Dowmunt <junczys@amu.edu.pl> 2016-10-10 16:25:11 +0300
commit: bab8badb02734ad51f7b5b8c1922d04c8cc4a104 (patch)
tree: 2d6e47aee4e44162da2916b876e851248d25bcfc
parent: 0d4b58712262a05498a42656fbd6a413a7652036 (diff)
2 files changed, 114 insertions, 11 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95d13e6d..6d044db3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,23 +8,24 @@ SET(CMAKE_CXX_FLAGS " -std=c++14 -m64 -flto -march=native -fPIC -g -O3 -Ofast -f
 
 include_directories(${amunmt_SOURCE_DIR})
 
-set(CUDA ON CACHE BOOL "Compile without CUDA")
+option(CUDA "Select to compile CUDA support by default if available" ON)
 
-if(NOT CUDA)
-    message("-- Forcing compilation without CUDA.")
-    add_definitions(-DNO_CUDA)
-else(NOT CUDA)
-find_package(CUDA)
-if(CUDA_FOUND)
+if(CUDA)
+  find_package(CUDA)
+  if(CUDA_FOUND)
     LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math;)
     add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
     add_definitions(-DCUDA)
     SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
-else(CUDA_FOUND)
+    message("-- Compiling with CUDA support")
+  else(CUDA_FOUND)
+    add_definitions(-DNO_CUDA)
+    message("-- Cannot find CUDA libraries. Compiling without them." )
+  endif(CUDA_FOUND)
+else(CUDA)
+    message("-- Forcing compilation without CUDA.")
     add_definitions(-DNO_CUDA)
-    message("Cannot find CUDA libraries. Compiling without them." )
-endif(CUDA_FOUND)
-endif(NOT CUDA)
+endif(CUDA)
 
 find_package(Boost COMPONENTS system filesystem program_options timer iostreams python thread)
 if(Boost_FOUND)
diff --git a/src/gpu/nth_benchmark.cu b/src/gpu/nth_benchmark.cu
new file mode 100644
index 00000000..653e4d7c
--- /dev/null
+++ b/src/gpu/nth_benchmark.cu
@@ -0,0 +1,102 @@
+
+#include <cmath>
+#include <memory>
+#include <sstream>
+#include <random>
+#include <algorithm>
+
+#include <boost/timer/timer.hpp>
+#include "gpu/mblas/matrix.h"
+#include "gpu/nth_element.h"
+
+using namespace thrust::placeholders;
+
+struct ProbCompare {
+  ProbCompare(const float* data) : data_(data) {}
+
+  __host__ __device__
+  bool operator()(const unsigned a, const unsigned b) {
+    return data_[a] > data_[b];
+  }
+
+  const float* data_;
+};
+
+int main(int argc, char** argv) {
+
+  size_t beamSize = 5;
+  size_t vocSize = 30000;
+  size_t tests = 1000;
+
+  std::vector<float> rands(beamSize * vocSize);
+  std::random_device rnd_device;
+  std::mt19937 mersenne_engine(rnd_device());
+  std::uniform_real_distribution<float> dist(-100, -4);
+
+  auto gen = std::bind(dist, mersenne_engine);
+  std::generate(std::begin(rands), std::end(rands), gen);
+
+  rands[10000 + 30000 * 0] = -.1;
+  rands[10001 + 30000 * 1] = -.1;
+  rands[10002 + 30000 * 2] = -.1;
+  rands[10003 + 30000 * 3] = -.1;
+  rands[10004 + 30000 * 4] = -.1;
+
+  DeviceVector<float> ProbsOrig(beamSize * vocSize);
+  thrust::copy(rands.begin(), rands.end(), ProbsOrig.begin());
+
+  DeviceVector<float> Probs(beamSize * vocSize);
+
+  DeviceVector<unsigned> keys(Probs.size());
+  HostVector<unsigned> bestKeys(beamSize);
+  HostVector<float> bestCosts(beamSize);
+
+  while(0) {
+    boost::timer::cpu_timer timer;
+    for(int i = 0; i < tests; ++i) {
+      thrust::copy(ProbsOrig.begin(), ProbsOrig.end(), Probs.begin());
+
+      thrust::sequence(keys.begin(), keys.end());
+      thrust::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(),
+                          ProbCompare(thrust::raw_pointer_cast(Probs.data())));
+
+      //for(int i = 0; i < beamSize; ++i) {
+      //  bestKeys[i] = keys[i];
+      //  bestCosts[i] = Probs[keys[i]];
+      //}
+    }
+    std::cerr << "Search took " << timer.format(3, "%ws");
+  }
+
+  {
+    boost::timer::cpu_timer timer;
+    for(int i = 0; i < tests; ++i) {
+      thrust::copy(ProbsOrig.begin(), ProbsOrig.end(), Probs.begin());
+
+      for(size_t j = 0; j < beamSize; ++j) {
+        DeviceVector<float>::iterator iter =
+          algo::max_element(Probs.begin(), Probs.end());
+        bestKeys[j] = iter - Probs.begin();
+        bestCosts[j] = *iter;
+        *iter = std::numeric_limits<float>::lowest();
+      }
+      algo::copy(bestKeys.begin(), bestKeys.end(), keys.begin());
+    }
+    std::cerr << "Search took " << timer.format(3, "%ws") << std::endl;
+  }
+
+  {
+    boost::timer::cpu_timer timer;
+    for(int i = 0; i < tests; ++i) {
+      thrust::copy(ProbsOrig.begin(), ProbsOrig.end(), Probs.begin());
+
+      thrust::sequence(keys.begin(), keys.end());
+      thrust::sort_by_key(Probs.begin(), Probs.end(),
+                          keys.begin(), algo::greater<float>());
+
+      algo::copy_n(keys.begin(), beamSize, bestKeys.begin());
+      algo::copy_n(Probs.begin(), beamSize, bestCosts.begin());
+    }
+    std::cerr << "Search took " << timer.format(3, "%ws") << std::endl;
+  }
+}
author	Marcin Junczys-Dowmunt <junczys@amu.edu.pl>	2016-10-10 16:15:54 +0300
committer	Marcin Junczys-Dowmunt <junczys@amu.edu.pl>	2016-10-10 16:25:11 +0300
commit	bab8badb02734ad51f7b5b8c1922d04c8cc4a104 (patch)
tree	2d6e47aee4e44162da2916b876e851248d25bcfc
parent	0d4b58712262a05498a42656fbd6a413a7652036 (diff)