Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2018-02-27 03:18:29 +0300
committerHieu Hoang <hieuhoang@gmail.com>2018-02-27 03:18:29 +0300
commitdf29f16826c692458e0f30e697c4636e351b45aa (patch)
tree73488b490618a31fd30213e1831713965de3793c
parent205c8ca9b949b3bd5c590c2a6cd12f144fc0702b (diff)
parent3027e801b7047897302266d7d1937c4475fdb49b (diff)
Merge ../marian.hieu
-rw-r--r--contrib/other-builds/amunmt/.project85
-rw-r--r--src/amun/CMakeLists.txt12
-rw-r--r--src/amun/common/base_best_hyps.cpp2
-rw-r--r--src/amun/common/base_best_hyps.h10
-rw-r--r--src/amun/common/base_tensor.cpp (renamed from src/amun/common/base_matrix.cpp)6
-rw-r--r--src/amun/common/base_tensor.h (renamed from src/amun/common/base_matrix.h)13
-rw-r--r--src/amun/common/god.cpp2
-rw-r--r--src/amun/common/god.h2
-rw-r--r--src/amun/common/loader.h2
-rw-r--r--src/amun/common/scorer.h11
-rw-r--r--src/amun/common/search.cpp2
-rw-r--r--src/amun/common/search.h2
-rw-r--r--src/amun/cpu/decoder/best_hyps.h6
-rw-r--r--src/amun/cpu/decoder/encoder_decoder.h10
-rw-r--r--src/amun/cpu/decoder/encoder_decoder_loader.cpp4
-rw-r--r--src/amun/cpu/decoder/encoder_decoder_loader.h2
-rw-r--r--src/amun/cpu/decoder/encoder_decoder_state.cpp8
-rw-r--r--src/amun/cpu/decoder/encoder_decoder_state.h14
-rw-r--r--src/amun/cpu/dl4mt/decoder.h118
-rw-r--r--src/amun/cpu/dl4mt/encoder.cpp6
-rw-r--r--src/amun/cpu/dl4mt/encoder.h16
-rw-r--r--src/amun/cpu/dl4mt/encoder_decoder.cpp8
-rw-r--r--src/amun/cpu/dl4mt/encoder_decoder.h8
-rw-r--r--src/amun/cpu/dl4mt/gru.h24
-rw-r--r--src/amun/cpu/dl4mt/model.cpp4
-rw-r--r--src/amun/cpu/dl4mt/model.h82
-rw-r--r--src/amun/cpu/mblas/tensor.cpp (renamed from src/amun/cpu/mblas/matrix.cpp)2
-rw-r--r--src/amun/cpu/mblas/tensor.h (renamed from src/amun/cpu/mblas/matrix.h)12
-rw-r--r--src/amun/cpu/nematus/decoder.h118
-rw-r--r--src/amun/cpu/nematus/encoder.cpp6
-rw-r--r--src/amun/cpu/nematus/encoder.h16
-rw-r--r--src/amun/cpu/nematus/encoder_decoder.cpp10
-rw-r--r--src/amun/cpu/nematus/encoder_decoder.h8
-rw-r--r--src/amun/cpu/nematus/gru.h46
-rw-r--r--src/amun/cpu/nematus/model.cpp12
-rw-r--r--src/amun/cpu/nematus/model.h144
-rw-r--r--src/amun/cpu/nematus/transition.cpp4
-rw-r--r--src/amun/cpu/nematus/transition.h20
-rw-r--r--src/amun/cpu/npz_converter.h16
-rw-r--r--src/amun/fpga/best_hyps.cpp10
-rw-r--r--src/amun/fpga/best_hyps.h6
-rw-r--r--src/amun/fpga/decoder.cpp40
-rw-r--r--src/amun/fpga/decoder.h104
-rw-r--r--src/amun/fpga/encoder.cpp2
-rw-r--r--src/amun/fpga/encoder.h20
-rw-r--r--src/amun/fpga/encoder_decoder.cpp2
-rw-r--r--src/amun/fpga/encoder_decoder.h4
-rw-r--r--src/amun/fpga/encoder_decoder_loader.cpp4
-rw-r--r--src/amun/fpga/encoder_decoder_loader.h2
-rw-r--r--src/amun/fpga/encoder_decoder_state.cpp8
-rw-r--r--src/amun/fpga/encoder_decoder_state.h12
-rw-r--r--src/amun/fpga/gru.h40
-rw-r--r--src/amun/fpga/matrix.cpp2
-rw-r--r--src/amun/fpga/matrix.h4
-rw-r--r--src/amun/fpga/matrix_functions.cpp18
-rw-r--r--src/amun/fpga/matrix_functions.h18
-rw-r--r--src/amun/fpga/model.h118
-rw-r--r--src/amun/fpga/npz_converter.cpp4
-rw-r--r--src/amun/fpga/npz_converter.h2
-rw-r--r--src/amun/fpga/nth_element.cpp2
-rw-r--r--src/amun/fpga/nth_element.h2
-rw-r--r--src/amun/gpu/decoder/ape_penalty.h4
-rw-r--r--src/amun/gpu/decoder/best_hyps.cu20
-rw-r--r--src/amun/gpu/decoder/best_hyps.h12
-rw-r--r--src/amun/gpu/decoder/encoder_decoder.cu12
-rw-r--r--src/amun/gpu/decoder/encoder_decoder.h12
-rw-r--r--src/amun/gpu/decoder/encoder_decoder_loader.cu4
-rw-r--r--src/amun/gpu/decoder/encoder_decoder_loader.h2
-rw-r--r--src/amun/gpu/decoder/encoder_decoder_state.cu4
-rw-r--r--src/amun/gpu/decoder/encoder_decoder_state.h6
-rw-r--r--src/amun/gpu/decoder/language_model.h2
-rw-r--r--src/amun/gpu/dl4mt/cell.h6
-rw-r--r--src/amun/gpu/dl4mt/cellstate.h14
-rw-r--r--src/amun/gpu/dl4mt/decoder.h98
-rw-r--r--src/amun/gpu/dl4mt/encoder.cu2
-rw-r--r--src/amun/gpu/dl4mt/encoder.h24
-rw-r--r--src/amun/gpu/dl4mt/gru.cu14
-rw-r--r--src/amun/gpu/dl4mt/gru.h68
-rw-r--r--src/amun/gpu/dl4mt/lstm.h20
-rw-r--r--src/amun/gpu/dl4mt/model.cu10
-rw-r--r--src/amun/gpu/dl4mt/model.h192
-rw-r--r--src/amun/gpu/dl4mt/multiplicative.h8
-rw-r--r--src/amun/gpu/mblas/nth_element.cu14
-rw-r--r--src/amun/gpu/mblas/nth_element.h8
-rw-r--r--src/amun/gpu/mblas/nth_element_kernels.cu8
-rw-r--r--src/amun/gpu/mblas/nth_element_kernels.h12
-rw-r--r--src/amun/gpu/mblas/tensor.cu (renamed from src/amun/gpu/mblas/matrix.cu)2
-rw-r--r--src/amun/gpu/mblas/tensor.h (renamed from src/amun/gpu/mblas/matrix.h)23
-rw-r--r--src/amun/gpu/mblas/tensor_functions.cu (renamed from src/amun/gpu/mblas/matrix_functions.cu)183
-rw-r--r--src/amun/gpu/mblas/tensor_functions.h (renamed from src/amun/gpu/mblas/matrix_functions.h)134
-rw-r--r--src/amun/gpu/mblas/tensor_wrapper.h (renamed from src/amun/gpu/mblas/matrix_wrapper.h)16
-rw-r--r--src/amun/gpu/mblas/vector_wrapper.h2
-rw-r--r--src/amun/gpu/npz_converter.cu20
-rw-r--r--src/amun/gpu/npz_converter.h6
m---------src/marian10
95 files changed, 1142 insertions, 1127 deletions
diff --git a/contrib/other-builds/amunmt/.project b/contrib/other-builds/amunmt/.project
index 106dac4a..413c6001 100644
--- a/contrib/other-builds/amunmt/.project
+++ b/contrib/other-builds/amunmt/.project
@@ -141,9 +141,9 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/common/base_matrix.cpp</locationURI>
</link>
<link>
- <name>src/amun/common/base_matrix.h</name>
+ <name>src/amun/common/base_tensor.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/common/base_matrix.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/common/base_tensor.h</locationURI>
</link>
<link>
<name>src/amun/common/beam.cpp</name>
@@ -1291,11 +1291,6 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/decoder/encoder_decoder_state.h</locationURI>
</link>
<link>
- <name>src/amun/cpu/dl4mt/decoder.cpp</name>
- <type>1</type>
- <locationURI>null:/decoder.cpp</locationURI>
- </link>
- <link>
<name>src/amun/cpu/dl4mt/decoder.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/dl4mt/decoder.h</locationURI>
@@ -1346,16 +1341,6 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/dl4mt/model.h</locationURI>
</link>
<link>
- <name>src/amun/cpu/mblas/matrix.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/mblas/matrix.cpp</locationURI>
- </link>
- <link>
- <name>src/amun/cpu/mblas/matrix.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/mblas/matrix.h</locationURI>
- </link>
- <link>
<name>src/amun/cpu/mblas/phoenix_functions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/mblas/phoenix_functions.cpp</locationURI>
@@ -1371,6 +1356,16 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/mblas/simd_math_prims.h</locationURI>
</link>
<link>
+ <name>src/amun/cpu/mblas/tensor.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/mblas/tensor.cpp</locationURI>
+ </link>
+ <link>
+ <name>src/amun/cpu/mblas/tensor.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/mblas/tensor.h</locationURI>
+ </link>
+ <link>
<name>src/amun/cpu/nematus/decoder.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/src/amun/cpu/nematus/decoder.h</locationURI>
@@ -1516,6 +1511,21 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/decoder/language_model.h</locationURI>
</link>
<link>
+ <name>src/amun/gpu/dl4mt/cell.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/dl4mt/cell.h</locationURI>
+ </link>
+ <link>
+ <name>src/amun/gpu/dl4mt/cellstate.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/dl4mt/cellstate.cpp</locationURI>
+ </link>
+ <link>
+ <name>src/amun/gpu/dl4mt/cellstate.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/dl4mt/cellstate.h</locationURI>
+ </link>
+ <link>
<name>src/amun/gpu/dl4mt/decoder.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/dl4mt/decoder.h</locationURI>
@@ -1561,6 +1571,11 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/dl4mt/model.h</locationURI>
</link>
<link>
+ <name>src/amun/gpu/dl4mt/multiplicative.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/dl4mt/multiplicative.h</locationURI>
+ </link>
+ <link>
<name>src/amun/gpu/mblas/handles.cu</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/handles.cu</locationURI>
@@ -1571,49 +1586,49 @@
<locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/handles.h</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/matrix.cu</name>
+ <name>src/amun/gpu/mblas/nth_element.cu</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/matrix.cu</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element.cu</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/matrix.h</name>
+ <name>src/amun/gpu/mblas/nth_element.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/matrix.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element.h</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/matrix_functions.cu</name>
+ <name>src/amun/gpu/mblas/nth_element_kernels.cu</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/matrix_functions.cu</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element_kernels.cu</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/matrix_functions.h</name>
+ <name>src/amun/gpu/mblas/nth_element_kernels.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/matrix_functions.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element_kernels.h</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/matrix_wrapper.h</name>
+ <name>src/amun/gpu/mblas/tensor.cu</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/matrix_wrapper.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/tensor.cu</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/nth_element.cu</name>
+ <name>src/amun/gpu/mblas/tensor.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element.cu</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/tensor.h</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/nth_element.h</name>
+ <name>src/amun/gpu/mblas/tensor_functions.cu</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/tensor_functions.cu</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/nth_element_kernels.cu</name>
+ <name>src/amun/gpu/mblas/tensor_functions.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element_kernels.cu</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/tensor_functions.h</locationURI>
</link>
<link>
- <name>src/amun/gpu/mblas/nth_element_kernels.h</name>
+ <name>src/amun/gpu/mblas/tensor_wrapper.h</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/nth_element_kernels.h</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/src/amun/gpu/mblas/tensor_wrapper.h</locationURI>
</link>
<link>
<name>src/amun/gpu/mblas/thrust_functions.h</name>
diff --git a/src/amun/CMakeLists.txt b/src/amun/CMakeLists.txt
index bf53e6d5..6145cbda 100644
--- a/src/amun/CMakeLists.txt
+++ b/src/amun/CMakeLists.txt
@@ -9,8 +9,8 @@ list(APPEND SOURCES "${CMAKE_CURRENT_BINARY_DIR}/common/git_version.cpp")
add_library(cpumode OBJECT
- cpu/mblas/matrix.cpp
cpu/mblas/phoenix_functions.cpp
+ cpu/mblas/tensor.cpp
cpu/decoder/encoder_decoder.cpp
cpu/decoder/encoder_decoder_state.cpp
cpu/decoder/encoder_decoder_loader.cpp
@@ -43,7 +43,6 @@ add_library(cpumode OBJECT
add_library(libcommon OBJECT
${CMAKE_CURRENT_BINARY_DIR}/common/git_version.cpp
common/base_best_hyps.cpp
- common/base_matrix.cpp
common/config.cpp
common/exception.cpp
common/filter.cpp
@@ -64,6 +63,7 @@ add_library(libcommon OBJECT
common/utils.cpp
common/vocab.cpp
common/factor_vocab.cpp
+ common/base_tensor.cpp
common/translation_task.cpp
)
@@ -89,10 +89,10 @@ cuda_add_executable(
gpu/dl4mt/gru.cu
gpu/dl4mt/model.cu
gpu/mblas/handles.cu
- gpu/mblas/matrix.cu
- gpu/mblas/matrix_functions.cu
gpu/mblas/nth_element.cu
gpu/mblas/nth_element_kernels.cu
+ gpu/mblas/tensor.cu
+ gpu/mblas/tensor_functions.cu
gpu/npz_converter.cu
gpu/types-gpu.cu
@@ -112,10 +112,10 @@ cuda_add_library(python SHARED
gpu/decoder/encoder_decoder_loader.cu
gpu/decoder/encoder_decoder_state.cu
gpu/mblas/handles.cu
- gpu/mblas/matrix.cu
- gpu/mblas/matrix_functions.cu
gpu/mblas/nth_element.cu
gpu/mblas/nth_element_kernels.cu
+ gpu/mblas/tensor.cu
+ gpu/mblas/tensor_functions.cu
gpu/dl4mt/encoder.cu
gpu/dl4mt/gru.cu
gpu/dl4mt/model.cu
diff --git a/src/amun/common/base_best_hyps.cpp b/src/amun/common/base_best_hyps.cpp
index a5221b8f..052337a8 100644
--- a/src/amun/common/base_best_hyps.cpp
+++ b/src/amun/common/base_best_hyps.cpp
@@ -5,7 +5,7 @@ using namespace std;
namespace amunmt {
-BestHypsBase::BestHypsBase(const God &god)
+BaseBestHyps::BaseBestHyps(const God &god)
: god_(god),
forbidUNK_(!god.Get<bool>("allow-unk")),
isInputFiltered_(god.Get<std::vector<std::string>>("softmax-filter").size()),
diff --git a/src/amun/common/base_best_hyps.h b/src/amun/common/base_best_hyps.h
index f643cbb8..8b720ef0 100644
--- a/src/amun/common/base_best_hyps.h
+++ b/src/amun/common/base_best_hyps.h
@@ -9,14 +9,12 @@
namespace amunmt {
-class God;
-
-class BestHypsBase
+class BaseBestHyps
{
public:
- BestHypsBase(const God &god);
+ BaseBestHyps(const God &god);
- BestHypsBase(const BestHypsBase&) = delete;
+ BaseBestHyps(const BaseBestHyps&) = delete;
virtual void CalcBeam(
const Beam& prevHyps,
@@ -34,6 +32,6 @@ class BestHypsBase
};
-typedef std::shared_ptr<BestHypsBase> BestHypsBasePtr;
+typedef std::shared_ptr<BaseBestHyps> BaseBestHypsPtr;
}
diff --git a/src/amun/common/base_matrix.cpp b/src/amun/common/base_tensor.cpp
index 4324dee5..56105978 100644
--- a/src/amun/common/base_matrix.cpp
+++ b/src/amun/common/base_tensor.cpp
@@ -1,11 +1,11 @@
#include <sstream>
-#include "base_matrix.h"
+#include "base_tensor.h"
using namespace std;
namespace amunmt {
-unsigned BaseMatrix::size() const {
+unsigned BaseTensor::size() const {
unsigned ret = dim(0);
for (unsigned i = 1; i < SHAPE_SIZE; ++i) {
ret *= dim(i);
@@ -14,7 +14,7 @@ unsigned BaseMatrix::size() const {
return ret;
}
-std::string BaseMatrix::Debug(unsigned detailed) const
+std::string BaseTensor::Debug(unsigned detailed) const
{
std::stringstream strm;
strm << dim(0) << "x" << dim(1) << "x" << dim(2) << "x" << dim(3) << "=" << size();
diff --git a/src/amun/common/base_matrix.h b/src/amun/common/base_tensor.h
index 068ac873..13159159 100644
--- a/src/amun/common/base_matrix.h
+++ b/src/amun/common/base_tensor.h
@@ -9,17 +9,10 @@ namespace amunmt {
const unsigned SHAPE_SIZE = 4;
-class Hypothesis;
-typedef std::shared_ptr<Hypothesis> HypothesisPtr;
-typedef std::vector<HypothesisPtr> Beam;
-
-class Scorer;
-typedef std::shared_ptr<Scorer> ScorerPtr;
-
-class BaseMatrix {
+class BaseTensor {
public:
- BaseMatrix() {}
- virtual ~BaseMatrix() {}
+ BaseTensor() {}
+ virtual ~BaseTensor() {}
virtual unsigned dim(unsigned i) const = 0;
diff --git a/src/amun/common/god.cpp b/src/amun/common/god.cpp
index b4574835..130985b9 100644
--- a/src/amun/common/god.cpp
+++ b/src/amun/common/god.cpp
@@ -270,7 +270,7 @@ std::vector<ScorerPtr> God::GetScorers(const DeviceInfo &deviceInfo) const {
return scorers;
}
-BestHypsBasePtr God::GetBestHyps(const DeviceInfo &deviceInfo) const {
+BaseBestHypsPtr God::GetBestHyps(const DeviceInfo &deviceInfo) const {
if (deviceInfo.deviceType == CPUDevice) {
return cpuLoaders_.begin()->second->GetBestHyps(*this, deviceInfo);
}
diff --git a/src/amun/common/god.h b/src/amun/common/god.h
index 9153ecf5..e489ba98 100644
--- a/src/amun/common/god.h
+++ b/src/amun/common/god.h
@@ -61,7 +61,7 @@ class God {
std::shared_ptr<const Filter> GetFilter() const;
- BestHypsBasePtr GetBestHyps(const DeviceInfo &deviceInfo) const;
+ BaseBestHypsPtr GetBestHyps(const DeviceInfo &deviceInfo) const;
std::vector<ScorerPtr> GetScorers(const DeviceInfo &deviceInfo) const;
std::vector<std::string> GetScorerNames() const;
diff --git a/src/amun/common/loader.h b/src/amun/common/loader.h
index 5f4940f3..55b39fc6 100644
--- a/src/amun/common/loader.h
+++ b/src/amun/common/loader.h
@@ -29,7 +29,7 @@ class Loader {
}
virtual ScorerPtr NewScorer(const God &god, const DeviceInfo &deviceInfo) const = 0;
- virtual BestHypsBasePtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const = 0;
+ virtual BaseBestHypsPtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const = 0;
const std::string& GetName() const {
return name_;
diff --git a/src/amun/common/scorer.h b/src/amun/common/scorer.h
index 7ed6bd7f..8c3a46a8 100644
--- a/src/amun/common/scorer.h
+++ b/src/amun/common/scorer.h
@@ -5,12 +5,17 @@
#include "common/hypothesis.h"
#include "common/sentence.h"
-#include "common/base_matrix.h"
+#include "common/base_tensor.h"
#include "yaml-cpp/node/node.h"
namespace amunmt {
+class God;
class Sentences;
+class Hypothesis;
+typedef std::shared_ptr<Hypothesis> HypothesisPtr;
+typedef std::vector<HypothesisPtr> Beam;
+
class State {
public:
@@ -63,9 +68,9 @@ class Scorer {
return name_;
}
- virtual BaseMatrix& GetProbs() = 0;
+ virtual BaseTensor& GetProbs() = 0;
virtual void *GetNBest() = 0; // hack - need to return matrix<NthOut> but NthOut contain cuda code
- virtual const BaseMatrix *GetBias() const = 0;
+ virtual const BaseTensor *GetBias() const = 0;
protected:
const God &god_;
diff --git a/src/amun/common/search.cpp b/src/amun/common/search.cpp
index 70b2e4ae..6bfb704e 100644
--- a/src/amun/common/search.cpp
+++ b/src/amun/common/search.cpp
@@ -5,7 +5,7 @@
#include "common/history.h"
#include "common/histories.h"
#include "common/filter.h"
-#include "common/base_matrix.h"
+#include "common/base_tensor.h"
#ifdef CUDA
#include <cuda.h>
diff --git a/src/amun/common/search.h b/src/amun/common/search.h
index 4aec6068..c03a54bf 100644
--- a/src/amun/common/search.h
+++ b/src/amun/common/search.h
@@ -41,7 +41,7 @@ class Search {
const unsigned maxBeamSize_;
bool normalizeScore_;
Words filterIndices_;
- BestHypsBasePtr bestHyps_;
+ BaseBestHypsPtr bestHyps_;
std::vector<unsigned> activeCount_;
void BatchStats();
diff --git a/src/amun/cpu/decoder/best_hyps.h b/src/amun/cpu/decoder/best_hyps.h
index 461246c0..1621b681 100644
--- a/src/amun/cpu/decoder/best_hyps.h
+++ b/src/amun/cpu/decoder/best_hyps.h
@@ -6,7 +6,7 @@
#include "common/scorer.h"
#include "common/god.h"
#include "common/exception.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include "cpu/decoder/encoder_decoder.h"
namespace amunmt {
@@ -22,11 +22,11 @@ struct ProbCompare {
const float* data_;
};
-class BestHyps : public BestHypsBase
+class BestHyps : public BaseBestHyps
{
public:
BestHyps(const God &god)
- : BestHypsBase(god)
+ : BaseBestHyps(god)
{}
void CalcBeam(
diff --git a/src/amun/cpu/decoder/encoder_decoder.h b/src/amun/cpu/decoder/encoder_decoder.h
index 25e28f4e..f26ae952 100644
--- a/src/amun/cpu/decoder/encoder_decoder.h
+++ b/src/amun/cpu/decoder/encoder_decoder.h
@@ -3,7 +3,7 @@
#include <yaml-cpp/yaml.h>
#include "common/scorer.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include "cpu/decoder/encoder_decoder_state.h"
namespace amunmt {
@@ -22,8 +22,8 @@ class CPUEncoderDecoderBase : public Scorer {
virtual State* NewState() const;
- virtual void GetAttention(mblas::Matrix& Attention) = 0;
- virtual mblas::Matrix& GetAttention() = 0;
+ virtual void GetAttention(mblas::Tensor& Attention) = 0;
+ virtual mblas::Tensor& GetAttention() = 0;
virtual void *GetNBest()
{
@@ -31,14 +31,14 @@ class CPUEncoderDecoderBase : public Scorer {
return nullptr;
}
- virtual const BaseMatrix *GetBias() const
+ virtual const BaseTensor *GetBias() const
{
assert(false);
return nullptr;
}
protected:
- mblas::Matrix SourceContext_;
+ mblas::Tensor SourceContext_;
};
diff --git a/src/amun/cpu/decoder/encoder_decoder_loader.cpp b/src/amun/cpu/decoder/encoder_decoder_loader.cpp
index 55d7b65f..f29ed9c0 100644
--- a/src/amun/cpu/decoder/encoder_decoder_loader.cpp
+++ b/src/amun/cpu/decoder/encoder_decoder_loader.cpp
@@ -43,8 +43,8 @@ ScorerPtr EncoderDecoderLoader::NewScorer(const God &god, const DeviceInfo&) con
tab, *dl4mtModels_[0]));
}
-BestHypsBasePtr EncoderDecoderLoader::GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const {
- return BestHypsBasePtr(new CPU::BestHyps(god));
+BaseBestHypsPtr EncoderDecoderLoader::GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const {
+ return BaseBestHypsPtr(new CPU::BestHyps(god));
}
}
diff --git a/src/amun/cpu/decoder/encoder_decoder_loader.h b/src/amun/cpu/decoder/encoder_decoder_loader.h
index 8a90599c..89814dd0 100644
--- a/src/amun/cpu/decoder/encoder_decoder_loader.h
+++ b/src/amun/cpu/decoder/encoder_decoder_loader.h
@@ -28,7 +28,7 @@ class EncoderDecoderLoader : public Loader {
virtual void Load(const God& god);
virtual ScorerPtr NewScorer(const God &god, const DeviceInfo &deviceInfo) const;
- BestHypsBasePtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const;
+ BaseBestHypsPtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const;
private:
std::vector<std::unique_ptr<dl4mt::Weights>> dl4mtModels_;
diff --git a/src/amun/cpu/decoder/encoder_decoder_state.cpp b/src/amun/cpu/decoder/encoder_decoder_state.cpp
index 85112993..647f8c6c 100644
--- a/src/amun/cpu/decoder/encoder_decoder_state.cpp
+++ b/src/amun/cpu/decoder/encoder_decoder_state.cpp
@@ -14,19 +14,19 @@ std::string EncoderDecoderState::Debug(unsigned verbosity) const
return CPU::mblas::Debug(states_);
}
-CPU::mblas::Matrix& EncoderDecoderState::GetStates() {
+CPU::mblas::Tensor& EncoderDecoderState::GetStates() {
return states_;
}
-CPU::mblas::Matrix& EncoderDecoderState::GetEmbeddings() {
+CPU::mblas::Tensor& EncoderDecoderState::GetEmbeddings() {
return embeddings_;
}
-const CPU::mblas::Matrix& EncoderDecoderState::GetStates() const {
+const CPU::mblas::Tensor& EncoderDecoderState::GetStates() const {
return states_;
}
-const CPU::mblas::Matrix& EncoderDecoderState::GetEmbeddings() const {
+const CPU::mblas::Tensor& EncoderDecoderState::GetEmbeddings() const {
return embeddings_;
}
diff --git a/src/amun/cpu/decoder/encoder_decoder_state.h b/src/amun/cpu/decoder/encoder_decoder_state.h
index d0ebe53a..be3cd028 100644
--- a/src/amun/cpu/decoder/encoder_decoder_state.h
+++ b/src/amun/cpu/decoder/encoder_decoder_state.h
@@ -2,7 +2,7 @@
#include <vector>
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include "common/scorer.h"
namespace amunmt {
@@ -15,15 +15,15 @@ class EncoderDecoderState : public State {
virtual std::string Debug(unsigned verbosity = 1) const;
- CPU::mblas::Matrix& GetStates();
- const CPU::mblas::Matrix& GetStates() const;
+ CPU::mblas::Tensor& GetStates();
+ const CPU::mblas::Tensor& GetStates() const;
- CPU::mblas::Matrix& GetEmbeddings();
- const CPU::mblas::Matrix& GetEmbeddings() const;
+ CPU::mblas::Tensor& GetEmbeddings();
+ const CPU::mblas::Tensor& GetEmbeddings() const;
private:
- CPU::mblas::Matrix states_;
- CPU::mblas::Matrix embeddings_;
+ CPU::mblas::Tensor states_;
+ CPU::mblas::Tensor embeddings_;
};
} // namespace CPU
diff --git a/src/amun/cpu/dl4mt/decoder.h b/src/amun/cpu/dl4mt/decoder.h
index add290cc..d9bbf3c3 100644
--- a/src/amun/cpu/dl4mt/decoder.h
+++ b/src/amun/cpu/dl4mt/decoder.h
@@ -1,6 +1,6 @@
#pragma once
-#include "../mblas/matrix.h"
+#include "../mblas/tensor.h"
#include "model.h"
#include "gru.h"
#include "common/god.h"
@@ -18,13 +18,13 @@ class Decoder {
: w_(model)
{}
- void Lookup(mblas::Matrix& Rows, const std::vector<unsigned>& ids) {
+ void Lookup(mblas::Tensor& Rows, const std::vector<unsigned>& ids) {
using namespace mblas;
std::vector<unsigned> tids = ids;
for(auto&& id : tids)
if(id >= w_.E_.rows())
id = 1;
- Rows = Assemble<byRow, Matrix>(w_.E_, tids);
+ Rows = Assemble<byRow, Tensor>(w_.E_, tids);
}
size_t GetCols() {
@@ -46,14 +46,14 @@ class Decoder {
RNNHidden(const Weights1& initModel, const Weights2& gruModel)
: w_(initModel), gru_(gruModel) {}
- void InitializeState(mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+ void InitializeState(mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
const size_t batchSize = 1) {
using namespace mblas;
// Calculate mean of source context, rowwise
// Repeat mean batchSize times by broadcasting
- Temp1_ = Mean<byRow, Matrix>(SourceContext);
+ Temp1_ = Mean<byRow, Tensor>(SourceContext);
Temp2_.resize(batchSize, SourceContext.columns());
Temp2_ = 0.0f;
AddBiasVector<byRow>(Temp2_, Temp1_);
@@ -69,9 +69,9 @@ class Decoder {
State = blaze::forEach(State, Tanh());
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) {
gru_.GetNextState(NextState, State, Context);
}
@@ -79,8 +79,8 @@ class Decoder {
const Weights1& w_;
const GRU<Weights2> gru_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
};
//////////////////////////////////////////////////////////////
@@ -90,9 +90,9 @@ class Decoder {
RNNFinal(const Weights& model)
: gru_(model) {}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) {
gru_.GetNextState(NextState, State, Context);
}
@@ -110,7 +110,7 @@ class Decoder {
V_ = blaze::trans(blaze::row(w_.V_, 0));
}
- void Init(const mblas::Matrix& SourceContext) {
+ void Init(const mblas::Tensor& SourceContext) {
using namespace mblas;
SCU_ = SourceContext * w_.U_;
if (w_.Gamma_1_.rows()) {
@@ -119,9 +119,9 @@ class Decoder {
AddBiasVector<byRow>(SCU_, w_.B_);
}
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext) {
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext) {
using namespace mblas;
Temp2_ = HiddenState * w_.W_;
@@ -129,7 +129,7 @@ class Decoder {
LayerNormalization(Temp2_, w_.Gamma_2_);
}
- Temp1_ = Broadcast<Matrix>(Tanh(), SCU_, Temp2_);
+ Temp1_ = Broadcast<Tensor>(Tanh(), SCU_, Temp2_);
A_.resize(Temp1_.rows(), 1);
blaze::column(A_, 0) = Temp1_ * V_;
@@ -145,21 +145,21 @@ class Decoder {
AlignedSourceContext = A_ * SourceContext;
}
- void GetAttention(mblas::Matrix& Attention) {
+ void GetAttention(mblas::Tensor& Attention) {
Attention = A_;
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return A_;
}
private:
const Weights& w_;
- mblas::Matrix SCU_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
- mblas::Matrix A_;
+ mblas::Tensor SCU_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
+ mblas::Tensor A_;
mblas::ColumnVector V_;
};
@@ -173,9 +173,9 @@ class Decoder {
{}
void GetProbs(mblas::ArrayMatrix& Probs,
- const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext) {
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext) {
using namespace mblas;
@@ -212,20 +212,20 @@ class Decoder {
void Filter(const std::vector<unsigned>& ids) {
filtered_ = true;
using namespace mblas;
- FilteredW4_ = Assemble<byColumn, Matrix>(w_.W4_, ids);
- FilteredB4_ = Assemble<byColumn, Matrix>(w_.B4_, ids);
+ FilteredW4_ = Assemble<byColumn, Tensor>(w_.W4_, ids);
+ FilteredB4_ = Assemble<byColumn, Tensor>(w_.B4_, ids);
}
private:
const Weights& w_;
bool filtered_;
- mblas::Matrix FilteredW4_;
- mblas::Matrix FilteredB4_;
+ mblas::Tensor FilteredW4_;
+ mblas::Tensor FilteredB4_;
- mblas::Matrix T1_;
- mblas::Matrix T2_;
- mblas::Matrix T3_;
+ mblas::Tensor T1_;
+ mblas::Tensor T2_;
+ mblas::Tensor T3_;
};
public:
@@ -237,10 +237,10 @@ class Decoder {
softmax_(model.decSoftmax_)
{}
- void Decode(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Embeddings,
- const mblas::Matrix& SourceContext) {
+ void Decode(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embeddings,
+ const mblas::Tensor& SourceContext) {
GetHiddenState(HiddenState_, State, Embeddings);
GetAlignedSourceContext(AlignedSourceContext_, HiddenState_, SourceContext);
GetNextState(NextState, HiddenState_, AlignedSourceContext_);
@@ -251,20 +251,20 @@ class Decoder {
return Probs_;
}
- void EmptyState(mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+ void EmptyState(mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
size_t batchSize = 1) {
rnn1_.InitializeState(State, SourceContext, batchSize);
attention_.Init(SourceContext);
}
- void EmptyEmbedding(mblas::Matrix& Embedding,
+ void EmptyEmbedding(mblas::Tensor& Embedding,
size_t batchSize = 1) {
Embedding.resize(batchSize, embeddings_.GetCols());
Embedding = 0.0f;
}
- void Lookup(mblas::Matrix& Embedding,
+ void Lookup(mblas::Tensor& Embedding,
const std::vector<unsigned>& w) {
embeddings_.Lookup(Embedding, w);
}
@@ -273,11 +273,11 @@ class Decoder {
softmax_.Filter(ids);
}
- void GetAttention(mblas::Matrix& attention) {
+ void GetAttention(mblas::Tensor& attention) {
attention_.GetAttention(attention);
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return attention_.GetAttention();
}
@@ -287,34 +287,34 @@ class Decoder {
private:
- void GetHiddenState(mblas::Matrix& HiddenState,
- const mblas::Matrix& PrevState,
- const mblas::Matrix& Embedding) {
+ void GetHiddenState(mblas::Tensor& HiddenState,
+ const mblas::Tensor& PrevState,
+ const mblas::Tensor& Embedding) {
rnn1_.GetNextState(HiddenState, PrevState, Embedding);
}
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext) {
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext) {
attention_.GetAlignedSourceContext(AlignedSourceContext, HiddenState, SourceContext);
}
- void GetNextState(mblas::Matrix& State,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& AlignedSourceContext) {
+ void GetNextState(mblas::Tensor& State,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& AlignedSourceContext) {
rnn2_.GetNextState(State, HiddenState, AlignedSourceContext);
}
- void GetProbs(const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext) {
+ void GetProbs(const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext) {
softmax_.GetProbs(Probs_, State, Embedding, AlignedSourceContext);
}
private:
- mblas::Matrix HiddenState_;
- mblas::Matrix AlignedSourceContext_;
+ mblas::Tensor HiddenState_;
+ mblas::Tensor AlignedSourceContext_;
mblas::ArrayMatrix Probs_;
Embeddings<Weights::Embeddings> embeddings_;
diff --git a/src/amun/cpu/dl4mt/encoder.cpp b/src/amun/cpu/dl4mt/encoder.cpp
index 8887fdb9..05eb7971 100644
--- a/src/amun/cpu/dl4mt/encoder.cpp
+++ b/src/amun/cpu/dl4mt/encoder.cpp
@@ -7,15 +7,15 @@ namespace CPU {
namespace dl4mt {
void Encoder::Encode(const std::vector<unsigned>& words,
- mblas::Matrix& context) {
- std::vector<mblas::Matrix> embeddedWords;
+ mblas::Tensor& context) {
+ std::vector<mblas::Tensor> embeddedWords;
context.resize(words.size(),
forwardRnn_.GetStateLength()
+ backwardRnn_.GetStateLength());
for(auto& w : words) {
embeddedWords.emplace_back();
- mblas::Matrix &embed = embeddedWords.back();
+ mblas::Tensor &embed = embeddedWords.back();
embeddings_.Lookup(embed, w);
//cerr << "embed=" << embed.Debug(true) << endl;
}
diff --git a/src/amun/cpu/dl4mt/encoder.h b/src/amun/cpu/dl4mt/encoder.h
index efa6e3da..40ebd1f0 100644
--- a/src/amun/cpu/dl4mt/encoder.h
+++ b/src/amun/cpu/dl4mt/encoder.h
@@ -1,6 +1,6 @@
#pragma once
-#include "../mblas/matrix.h"
+#include "../mblas/tensor.h"
#include "../dl4mt/model.h"
#include "../dl4mt/gru.h"
@@ -19,7 +19,7 @@ class Encoder {
: w_(model)
{}
- void Lookup(mblas::Matrix& Row, size_t i) {
+ void Lookup(mblas::Tensor& Row, size_t i) {
size_t len = w_.E_.columns();
if(i < w_.E_.rows())
Row = blaze::submatrix(w_.E_, i, 0, 1, len);
@@ -43,15 +43,15 @@ class Encoder {
State_ = 0.0f;
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Embd) {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embd) {
gru_.GetNextState(NextState, State, Embd);
}
template <class It>
void Encode(It it, It end,
- mblas::Matrix& Context, bool invert) {
+ mblas::Tensor& Context, bool invert) {
InitializeState();
size_t n = std::distance(it, end);
@@ -76,7 +76,7 @@ class Encoder {
// Model matrices
const GRU<Weights> gru_;
- mblas::Matrix State_;
+ mblas::Tensor State_;
};
/////////////////////////////////////////////////////////////////
@@ -88,7 +88,7 @@ class Encoder {
{}
void Encode(const std::vector<unsigned>& words,
- mblas::Matrix& context);
+ mblas::Tensor& context);
private:
Embeddings<Weights::Embeddings> embeddings_;
diff --git a/src/amun/cpu/dl4mt/encoder_decoder.cpp b/src/amun/cpu/dl4mt/encoder_decoder.cpp
index 73e32d2f..0644182c 100644
--- a/src/amun/cpu/dl4mt/encoder_decoder.cpp
+++ b/src/amun/cpu/dl4mt/encoder_decoder.cpp
@@ -60,17 +60,17 @@ void EncoderDecoder::AssembleBeamState(const State& in,
const EDState& edIn = in.get<EDState>();
EDState& edOut = out.get<EDState>();
- edOut.GetStates() = mblas::Assemble<mblas::byRow, mblas::Matrix>(edIn.GetStates(), beamStateIds);
+ edOut.GetStates() = mblas::Assemble<mblas::byRow, mblas::Tensor>(edIn.GetStates(), beamStateIds);
decoder_->Lookup(edOut.GetEmbeddings(), beamWords);
}
-void EncoderDecoder::GetAttention(mblas::Matrix& Attention) {
+void EncoderDecoder::GetAttention(mblas::Tensor& Attention) {
decoder_->GetAttention(Attention);
}
-mblas::Matrix& EncoderDecoder::GetAttention() {
+mblas::Tensor& EncoderDecoder::GetAttention() {
return decoder_->GetAttention();
}
@@ -85,7 +85,7 @@ void EncoderDecoder::Filter(const std::vector<unsigned>& filterIds) {
}
-BaseMatrix& EncoderDecoder::GetProbs() {
+BaseTensor& EncoderDecoder::GetProbs() {
return decoder_->GetProbs();
}
diff --git a/src/amun/cpu/dl4mt/encoder_decoder.h b/src/amun/cpu/dl4mt/encoder_decoder.h
index 538ed833..69846373 100644
--- a/src/amun/cpu/dl4mt/encoder_decoder.h
+++ b/src/amun/cpu/dl4mt/encoder_decoder.h
@@ -4,7 +4,7 @@
#include <yaml-cpp/yaml.h>
#include "cpu/decoder/encoder_decoder.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include "cpu/dl4mt/model.h"
#include "cpu/dl4mt/encoder.h"
#include "cpu/dl4mt/decoder.h"
@@ -43,12 +43,12 @@ class EncoderDecoder : public CPUEncoderDecoderBase {
const Beam& beam,
State& out);
- void GetAttention(mblas::Matrix& Attention);
- mblas::Matrix& GetAttention();
+ void GetAttention(mblas::Tensor& Attention);
+ mblas::Tensor& GetAttention();
unsigned GetVocabSize() const;
- BaseMatrix& GetProbs();
+ BaseTensor& GetProbs();
void Filter(const std::vector<unsigned>& filterIds);
diff --git a/src/amun/cpu/dl4mt/gru.h b/src/amun/cpu/dl4mt/gru.h
index 72e3e19b..7089396e 100644
--- a/src/amun/cpu/dl4mt/gru.h
+++ b/src/amun/cpu/dl4mt/gru.h
@@ -1,5 +1,5 @@
#pragma once
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
namespace amunmt {
namespace CPU {
@@ -11,13 +11,13 @@ class GRU {
GRU(const Weights& model)
: w_(model) {
using namespace mblas;
- WWx_ = Concat<byColumn, Matrix>(w_.W_, w_.Wx_);
- UUx_ = Concat<byColumn, Matrix>(w_.U_, w_.Ux_);
+ WWx_ = Concat<byColumn, Tensor>(w_.W_, w_.Wx_);
+ UUx_ = Concat<byColumn, Tensor>(w_.U_, w_.Ux_);
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) const {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) const {
RUH_ = Context * WWx_;
if (w_.Gamma_1_.rows()) {
LayerNormalization(RUH_, w_.Gamma_1_);
@@ -33,8 +33,8 @@ class GRU {
ElementwiseOps(NextState, State);
}
- void ElementwiseOps(mblas::Matrix& NextState,
- const mblas::Matrix& State) const {
+ void ElementwiseOps(mblas::Tensor& NextState,
+ const mblas::Tensor& State) const {
using namespace mblas;
using namespace blaze;
@@ -78,12 +78,12 @@ class GRU {
private:
// Model matrices
const Weights& w_;
- mutable mblas::Matrix WWx_;
- mutable mblas::Matrix UUx_;
+ mutable mblas::Tensor WWx_;
+ mutable mblas::Tensor UUx_;
// reused to avoid allocation
- mutable mblas::Matrix RUH_;
- mutable mblas::Matrix Temp_;
+ mutable mblas::Tensor RUH_;
+ mutable mblas::Tensor Temp_;
};
}
diff --git a/src/amun/cpu/dl4mt/model.cpp b/src/amun/cpu/dl4mt/model.cpp
index a0d190e7..7a660f6e 100644
--- a/src/amun/cpu/dl4mt/model.cpp
+++ b/src/amun/cpu/dl4mt/model.cpp
@@ -25,7 +25,7 @@ Weights::GRU::GRU(const NpzConverter& model, const std::vector<std::string> &key
Gamma_1_(model[keys.at(6)]),
Gamma_2_(model[keys.at(7)])
{
- const_cast<mblas::Matrix&>(Bx2_) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx2_) = 0.0f;
}
//////////////////////////////////////////////////////////////////////////////
@@ -47,7 +47,7 @@ Weights::DecGRU2::DecGRU2(const NpzConverter& model)
Gamma_1_(model["decoder_cell2_gamma1"]),
Gamma_2_(model["decoder_cell2_gamma2"])
{
- const_cast<mblas::Matrix&>(Bx1_) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx1_) = 0.0f;
}
Weights::DecAttention::DecAttention(const NpzConverter& model)
diff --git a/src/amun/cpu/dl4mt/model.h b/src/amun/cpu/dl4mt/model.h
index 87299b87..cfe6ae25 100644
--- a/src/amun/cpu/dl4mt/model.h
+++ b/src/amun/cpu/dl4mt/model.h
@@ -5,7 +5,7 @@
#include <string>
#include "cpu/npz_converter.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
namespace amunmt {
namespace CPU {
@@ -19,21 +19,21 @@ struct Weights {
Embeddings(const NpzConverter& model, const std::string &key);
Embeddings(const NpzConverter& model, const std::vector<std::pair<std::string, bool>> keys);
- const mblas::Matrix E_;
+ const mblas::Tensor E_;
};
struct GRU {
GRU(const NpzConverter& model, const std::vector<std::string> &keys);
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Ux_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Ux_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
//////////////////////////////////////////////////////////////////////////////
@@ -41,51 +41,51 @@ struct Weights {
struct DecInit {
DecInit(const NpzConverter& model);
- const mblas::Matrix Wi_;
- const mblas::Matrix Bi_;
- const mblas::Matrix Gamma_;
+ const mblas::Tensor Wi_;
+ const mblas::Tensor Bi_;
+ const mblas::Tensor Gamma_;
};
struct DecGRU2 {
DecGRU2(const NpzConverter& model);
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Ux_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Ux_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
struct DecAttention {
DecAttention(const NpzConverter& model);
- const mblas::Matrix V_;
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix C_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor V_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor C_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
struct DecSoftmax {
DecSoftmax(const NpzConverter& model);
- const mblas::Matrix W1_;
- const mblas::Matrix B1_;
- const mblas::Matrix W2_;
- const mblas::Matrix B2_;
- const mblas::Matrix W3_;
- const mblas::Matrix B3_;
- const mblas::Matrix W4_;
- const mblas::Matrix B4_;
- const mblas::Matrix Gamma_0_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W1_;
+ const mblas::Tensor B1_;
+ const mblas::Tensor W2_;
+ const mblas::Tensor B2_;
+ const mblas::Tensor W3_;
+ const mblas::Tensor B3_;
+ const mblas::Tensor W4_;
+ const mblas::Tensor B4_;
+ const mblas::Tensor Gamma_0_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
//////////////////////////////////////////////////////////////////////////////
diff --git a/src/amun/cpu/mblas/matrix.cpp b/src/amun/cpu/mblas/tensor.cpp
index 2ee0bbb7..6c385be0 100644
--- a/src/amun/cpu/mblas/matrix.cpp
+++ b/src/amun/cpu/mblas/tensor.cpp
@@ -1,5 +1,5 @@
#include <boost/iterator/permutation_iterator.hpp>
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include "cpu/mblas/simd_math_prims.h"
#include "common/god.h"
#include "common/hypothesis.h"
diff --git a/src/amun/cpu/mblas/matrix.h b/src/amun/cpu/mblas/tensor.h
index 93e1f29a..7104085a 100644
--- a/src/amun/cpu/mblas/matrix.h
+++ b/src/amun/cpu/mblas/tensor.h
@@ -7,7 +7,7 @@
#include <blaze/Math.h>
#include "phoenix_functions.h"
-#include "common/base_matrix.h"
+#include "common/base_tensor.h"
#include "common/exception.h"
namespace amunmt {
@@ -19,16 +19,16 @@ typedef blaze::DynamicVector<float, blaze::rowVector> Vector;
typedef blaze::DynamicVector<float, blaze::columnVector> ColumnVector;
//////////////////////////////////////////////////////////////////////////////////////////////
-class Matrix : public BaseMatrix, public blaze::DynamicMatrix<float, blaze::rowMajor>
+class Tensor : public BaseTensor, public blaze::DynamicMatrix<float, blaze::rowMajor>
{
public:
typedef blaze::DynamicMatrix<float, blaze::rowMajor> Parent;
- Matrix()
+ Tensor()
: Parent()
{}
- Matrix(unsigned rows, unsigned cols)
+ Tensor(unsigned rows, unsigned cols)
: Parent(rows, cols)
{}
@@ -59,7 +59,7 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////////
template <typename T, bool SO = blaze::rowMajor>
-class BlazeMatrix : public BaseMatrix, public blaze::CustomMatrix<T, blaze::unaligned,
+class BlazeMatrix : public BaseTensor, public blaze::CustomMatrix<T, blaze::unaligned,
blaze::unpadded,
blaze::rowMajor> {
public:
@@ -208,7 +208,7 @@ MT& AddBiasVector(MT& m, const VT& b) {
return m;
}
-//Matrix& Swap(Matrix& Out, Matrix& In);
+//Tensor& Swap(Tensor& Out, Tensor& In);
template <class MT>
void Reshape(MT& m, unsigned rows, unsigned cols) {
diff --git a/src/amun/cpu/nematus/decoder.h b/src/amun/cpu/nematus/decoder.h
index 6c7c5aac..ab979ae7 100644
--- a/src/amun/cpu/nematus/decoder.h
+++ b/src/amun/cpu/nematus/decoder.h
@@ -1,6 +1,6 @@
#pragma once
-#include "../mblas/matrix.h"
+#include "../mblas/tensor.h"
#include "model.h"
#include "gru.h"
#include "transition.h"
@@ -19,7 +19,7 @@ class Decoder {
: w_(model)
{}
- void Lookup(mblas::Matrix& Rows, const std::vector<unsigned>& ids) {
+ void Lookup(mblas::Tensor& Rows, const std::vector<unsigned>& ids) {
using namespace mblas;
std::vector<unsigned> tids = ids;
for (auto&& id : tids) {
@@ -27,7 +27,7 @@ class Decoder {
id = 1;
}
}
- Rows = Assemble<byRow, Matrix>(w_.E_, tids);
+ Rows = Assemble<byRow, Tensor>(w_.E_, tids);
}
size_t GetCols() {
@@ -52,15 +52,15 @@ class Decoder {
{}
void InitializeState(
- mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+ mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
const size_t batchSize = 1)
{
using namespace mblas;
// Calculate mean of source context, rowwise
// Repeat mean batchSize times by broadcasting
- Temp1_ = Mean<byRow, Matrix>(SourceContext);
+ Temp1_ = Mean<byRow, Tensor>(SourceContext);
Temp2_.resize(batchSize, SourceContext.columns());
Temp2_ = 0.0f;
@@ -78,9 +78,9 @@ class Decoder {
// std::cerr << std::endl;
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) {
gru_.GetNextState(NextState, State, Context);
}
@@ -88,8 +88,8 @@ class Decoder {
const Weights1& w_;
const GRU<Weights2> gru_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
};
//////////////////////////////////////////////////////////////
@@ -102,9 +102,9 @@ class Decoder {
{}
void GetNextState(
- mblas::Matrix& nextState,
- const mblas::Matrix& state,
- const mblas::Matrix& context)
+ mblas::Tensor& nextState,
+ const mblas::Tensor& state,
+ const mblas::Tensor& context)
{
gru_.GetNextState(nextState, state, context);
transition_.GetNextState(nextState);
@@ -128,7 +128,7 @@ class Decoder {
V_ = blaze::trans(blaze::row(w_.V_, 0));
}
- void Init(const mblas::Matrix& SourceContext) {
+ void Init(const mblas::Tensor& SourceContext) {
using namespace mblas;
SCU_ = SourceContext * w_.U_;
mblas::AddBiasVector<mblas::byRow>(SCU_, w_.B_);
@@ -139,9 +139,9 @@ class Decoder {
}
void GetAlignedSourceContext(
- mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext)
+ mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext)
{
using namespace mblas;
@@ -150,7 +150,7 @@ class Decoder {
LayerNormalization(Temp2_, w_.W_comb_lns_, w_.W_comb_lnb_);
}
- Temp1_ = Broadcast<Matrix>(Tanh(), SCU_, Temp2_);
+ Temp1_ = Broadcast<Tensor>(Tanh(), SCU_, Temp2_);
A_.resize(Temp1_.rows(), 1);
blaze::column(A_, 0) = Temp1_ * V_;
@@ -166,21 +166,21 @@ class Decoder {
AlignedSourceContext = A_ * SourceContext;
}
- void GetAttention(mblas::Matrix& Attention) {
+ void GetAttention(mblas::Tensor& Attention) {
Attention = A_;
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return A_;
}
private:
const Weights& w_;
- mblas::Matrix SCU_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
- mblas::Matrix A_;
+ mblas::Tensor SCU_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
+ mblas::Tensor A_;
mblas::ColumnVector V_;
};
@@ -194,9 +194,9 @@ class Decoder {
{}
void GetProbs(mblas::ArrayMatrix& Probs,
- const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext) {
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext) {
using namespace mblas;
T1_ = State * w_.W1_;
@@ -244,20 +244,20 @@ class Decoder {
void Filter(const std::vector<unsigned>& ids) {
filtered_ = true;
using namespace mblas;
- FilteredW4_ = Assemble<byColumn, Matrix>(w_.W4_, ids);
- FilteredB4_ = Assemble<byColumn, Matrix>(w_.B4_, ids);
+ FilteredW4_ = Assemble<byColumn, Tensor>(w_.W4_, ids);
+ FilteredB4_ = Assemble<byColumn, Tensor>(w_.B4_, ids);
}
private:
const Weights& w_;
bool filtered_;
- mblas::Matrix FilteredW4_;
- mblas::Matrix FilteredB4_;
+ mblas::Tensor FilteredW4_;
+ mblas::Tensor FilteredB4_;
- mblas::Matrix T1_;
- mblas::Matrix T2_;
- mblas::Matrix T3_;
+ mblas::Tensor T1_;
+ mblas::Tensor T2_;
+ mblas::Tensor T3_;
};
public:
@@ -270,10 +270,10 @@ class Decoder {
{}
void Decode(
- mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Embeddings,
- const mblas::Matrix& SourceContext)
+ mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embeddings,
+ const mblas::Tensor& SourceContext)
{
GetHiddenState(HiddenState_, State, Embeddings);
// std::cerr << "HIDDEN: " << std::endl;
@@ -297,20 +297,20 @@ class Decoder {
return Probs_;
}
- void EmptyState(mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+ void EmptyState(mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
size_t batchSize = 1) {
rnn1_.InitializeState(State, SourceContext, batchSize);
attention_.Init(SourceContext);
}
- void EmptyEmbedding(mblas::Matrix& Embedding,
+ void EmptyEmbedding(mblas::Tensor& Embedding,
size_t batchSize = 1) {
Embedding.resize(batchSize, embeddings_.GetCols());
Embedding = 0.0f;
}
- void Lookup(mblas::Matrix& Embedding,
+ void Lookup(mblas::Tensor& Embedding,
const std::vector<unsigned>& w) {
embeddings_.Lookup(Embedding, w);
}
@@ -319,11 +319,11 @@ class Decoder {
softmax_.Filter(ids);
}
- void GetAttention(mblas::Matrix& attention) {
+ void GetAttention(mblas::Tensor& attention) {
attention_.GetAttention(attention);
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return attention_.GetAttention();
}
@@ -333,34 +333,34 @@ class Decoder {
private:
- void GetHiddenState(mblas::Matrix& HiddenState,
- const mblas::Matrix& PrevState,
- const mblas::Matrix& Embedding) {
+ void GetHiddenState(mblas::Tensor& HiddenState,
+ const mblas::Tensor& PrevState,
+ const mblas::Tensor& Embedding) {
rnn1_.GetNextState(HiddenState, PrevState, Embedding);
}
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext) {
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext) {
attention_.GetAlignedSourceContext(AlignedSourceContext, HiddenState, SourceContext);
}
- void GetNextState(mblas::Matrix& State,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& AlignedSourceContext) {
+ void GetNextState(mblas::Tensor& State,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& AlignedSourceContext) {
rnn2_.GetNextState(State, HiddenState, AlignedSourceContext);
}
- void GetProbs(const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext) {
+ void GetProbs(const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext) {
softmax_.GetProbs(Probs_, State, Embedding, AlignedSourceContext);
}
private:
- mblas::Matrix HiddenState_;
- mblas::Matrix AlignedSourceContext_;
+ mblas::Tensor HiddenState_;
+ mblas::Tensor AlignedSourceContext_;
mblas::ArrayMatrix Probs_;
Embeddings<Weights::Embeddings> embeddings_;
diff --git a/src/amun/cpu/nematus/encoder.cpp b/src/amun/cpu/nematus/encoder.cpp
index 49b9e8ea..240d7601 100644
--- a/src/amun/cpu/nematus/encoder.cpp
+++ b/src/amun/cpu/nematus/encoder.cpp
@@ -6,15 +6,15 @@ namespace amunmt {
namespace CPU {
namespace Nematus {
-void Encoder::GetContext(const std::vector<unsigned>& words, mblas::Matrix& context) {
- std::vector<mblas::Matrix> embeddedWords;
+void Encoder::GetContext(const std::vector<unsigned>& words, mblas::Tensor& context) {
+ std::vector<mblas::Tensor> embeddedWords;
context.resize(words.size(),
forwardRnn_.GetStateLength() + backwardRnn_.GetStateLength());
for (auto& w : words) {
embeddedWords.emplace_back();
- mblas::Matrix &embed = embeddedWords.back();
+ mblas::Tensor &embed = embeddedWords.back();
embeddings_.Lookup(embed, w);
}
diff --git a/src/amun/cpu/nematus/encoder.h b/src/amun/cpu/nematus/encoder.h
index 9e32dde6..b55246b4 100644
--- a/src/amun/cpu/nematus/encoder.h
+++ b/src/amun/cpu/nematus/encoder.h
@@ -1,6 +1,6 @@
#pragma once
-#include "../mblas/matrix.h"
+#include "../mblas/tensor.h"
#include "model.h"
#include "gru.h"
#include "transition.h"
@@ -20,7 +20,7 @@ class Encoder {
: w_(model)
{}
- void Lookup(mblas::Matrix& Row, size_t i) {
+ void Lookup(mblas::Tensor& Row, size_t i) {
size_t len = w_.E_.columns();
if(i < w_.E_.rows())
Row = blaze::submatrix(w_.E_, i, 0, 1, len);
@@ -46,9 +46,9 @@ class Encoder {
State_ = 0.0f;
}
- void GetNextState(mblas::Matrix& nextState,
- const mblas::Matrix& state,
- const mblas::Matrix& embd) {
+ void GetNextState(mblas::Tensor& nextState,
+ const mblas::Tensor& state,
+ const mblas::Tensor& embd) {
gru_.GetNextState(nextState, state, embd);
// std::cerr << "GRU: " << std::endl;
// for (int i = 0; i < 10; ++i) std::cerr << nextState(0, i) << " ";
@@ -60,7 +60,7 @@ class Encoder {
}
template <class It>
- void GetContext(It it, It end, mblas::Matrix& Context, bool invert) {
+ void GetContext(It it, It end, mblas::Tensor& Context, bool invert) {
InitializeState();
size_t n = std::distance(it, end);
@@ -86,7 +86,7 @@ class Encoder {
const GRU<WeightsGRU> gru_;
const Transition transition_;
- mblas::Matrix State_;
+ mblas::Tensor State_;
};
/////////////////////////////////////////////////////////////////
@@ -98,7 +98,7 @@ class Encoder {
{}
void GetContext(const std::vector<unsigned>& words,
- mblas::Matrix& context);
+ mblas::Tensor& context);
private:
Embeddings<Weights::Embeddings> embeddings_;
diff --git a/src/amun/cpu/nematus/encoder_decoder.cpp b/src/amun/cpu/nematus/encoder_decoder.cpp
index 8edc71bb..d3d3697a 100644
--- a/src/amun/cpu/nematus/encoder_decoder.cpp
+++ b/src/amun/cpu/nematus/encoder_decoder.cpp
@@ -7,7 +7,7 @@
#include "common/sentences.h"
#include "cpu/decoder/encoder_decoder_loader.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
using namespace std;
@@ -64,17 +64,17 @@ void EncoderDecoder::AssembleBeamState(const State& in,
const EDState& edIn = in.get<EDState>();
EDState& edOut = out.get<EDState>();
- edOut.GetStates() = mblas::Assemble<mblas::byRow, mblas::Matrix>(edIn.GetStates(), beamStateIds);
+ edOut.GetStates() = mblas::Assemble<mblas::byRow, mblas::Tensor>(edIn.GetStates(), beamStateIds);
decoder_->Lookup(edOut.GetEmbeddings(), beamWords);
}
-void EncoderDecoder::GetAttention(mblas::Matrix& Attention) {
+void EncoderDecoder::GetAttention(mblas::Tensor& Attention) {
decoder_->GetAttention(Attention);
}
-mblas::Matrix& EncoderDecoder::GetAttention() {
+mblas::Tensor& EncoderDecoder::GetAttention() {
return decoder_->GetAttention();
}
@@ -89,7 +89,7 @@ void EncoderDecoder::Filter(const std::vector<unsigned>& filterIds) {
}
-BaseMatrix& EncoderDecoder::GetProbs() {
+BaseTensor& EncoderDecoder::GetProbs() {
return decoder_->GetProbs();
}
diff --git a/src/amun/cpu/nematus/encoder_decoder.h b/src/amun/cpu/nematus/encoder_decoder.h
index ab81dd28..2fd43abb 100644
--- a/src/amun/cpu/nematus/encoder_decoder.h
+++ b/src/amun/cpu/nematus/encoder_decoder.h
@@ -8,7 +8,7 @@
#include "cpu/nematus/decoder.h"
#include "cpu/nematus/model.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
namespace amunmt {
@@ -38,12 +38,12 @@ class EncoderDecoder : public CPUEncoderDecoderBase {
const Beam& beam,
State& out);
- void GetAttention(mblas::Matrix& Attention);
- mblas::Matrix& GetAttention();
+ void GetAttention(mblas::Tensor& Attention);
+ mblas::Tensor& GetAttention();
unsigned GetVocabSize() const;
- BaseMatrix& GetProbs();
+ BaseTensor& GetProbs();
void Filter(const std::vector<unsigned>& filterIds);
diff --git a/src/amun/cpu/nematus/gru.h b/src/amun/cpu/nematus/gru.h
index 33e855fd..3166ca6e 100644
--- a/src/amun/cpu/nematus/gru.h
+++ b/src/amun/cpu/nematus/gru.h
@@ -1,5 +1,5 @@
#pragma once
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include <iomanip>
namespace amunmt {
@@ -13,15 +13,15 @@ class GRU {
layerNormalization_(w_.W_lns_.rows())
{
if (!layerNormalization_) {
- WWx_ = mblas::Concat<mblas::byColumn, mblas::Matrix>(w_.W_, w_.Wx_);
- UUx_ = mblas::Concat<mblas::byColumn, mblas::Matrix>(w_.U_, w_.Ux_);
+ WWx_ = mblas::Concat<mblas::byColumn, mblas::Tensor>(w_.W_, w_.Wx_);
+ UUx_ = mblas::Concat<mblas::byColumn, mblas::Tensor>(w_.U_, w_.Ux_);
}
}
void GetNextState(
- mblas::Matrix& nextState,
- const mblas::Matrix& state,
- const mblas::Matrix& context) const
+ mblas::Tensor& nextState,
+ const mblas::Tensor& state,
+ const mblas::Tensor& context) const
{
// std::cerr << "Get next state" << std::endl;
if (layerNormalization_) {
@@ -33,7 +33,7 @@ class GRU {
mblas::AddBiasVector<mblas::byRow>(RUH_2_, w_.Bx1_);
LayerNormalization(RUH_2_, w_.Wx_lns_, w_.Wx_lnb_);
- RUH_ = mblas::Concat<mblas::byColumn, mblas::Matrix>(RUH_1_, RUH_2_);
+ RUH_ = mblas::Concat<mblas::byColumn, mblas::Tensor>(RUH_1_, RUH_2_);
Temp_1_ = state * w_.U_;
mblas::AddBiasVector<mblas::byRow>(Temp_1_, w_.Bx3_);
@@ -43,7 +43,7 @@ class GRU {
mblas::AddBiasVector<mblas::byRow>(Temp_2_, w_.Bx2_);
LayerNormalization(Temp_2_, w_.Ux_lns_, w_.Ux_lnb_);
- Temp_ = mblas::Concat<mblas::byColumn, mblas::Matrix>(Temp_1_, Temp_2_);
+ Temp_ = mblas::Concat<mblas::byColumn, mblas::Tensor>(Temp_1_, Temp_2_);
ElementwiseOpsLayerNorm(nextState, state);
@@ -54,7 +54,7 @@ class GRU {
}
}
- void ElementwiseOps(mblas::Matrix& NextState, const mblas::Matrix& State) const {
+ void ElementwiseOps(mblas::Tensor& NextState, const mblas::Tensor& State) const {
using namespace mblas;
using namespace blaze;
@@ -88,7 +88,7 @@ class GRU {
}
}
- void ElementwiseOpsLayerNorm(mblas::Matrix& NextState, const mblas::Matrix& State) const {
+ void ElementwiseOpsLayerNorm(mblas::Tensor& NextState, const mblas::Tensor& State) const {
using namespace mblas;
using namespace blaze;
@@ -129,21 +129,21 @@ class GRU {
private:
// Model matrices
const Weights& w_;
- mutable mblas::Matrix WWx_;
- mutable mblas::Matrix UUx_;
- mutable mblas::Matrix Wbbx_;
- mutable mblas::Matrix lns_WWx_;
- mutable mblas::Matrix lns_UUx_;
- mutable mblas::Matrix lnb_WWx_;
- mutable mblas::Matrix lnb_UUx_;
+ mutable mblas::Tensor WWx_;
+ mutable mblas::Tensor UUx_;
+ mutable mblas::Tensor Wbbx_;
+ mutable mblas::Tensor lns_WWx_;
+ mutable mblas::Tensor lns_UUx_;
+ mutable mblas::Tensor lnb_WWx_;
+ mutable mblas::Tensor lnb_UUx_;
// reused to avoid allocation
- mutable mblas::Matrix RUH_;
- mutable mblas::Matrix RUH_1_;
- mutable mblas::Matrix RUH_2_;
- mutable mblas::Matrix Temp_;
- mutable mblas::Matrix Temp_1_;
- mutable mblas::Matrix Temp_2_;
+ mutable mblas::Tensor RUH_;
+ mutable mblas::Tensor RUH_1_;
+ mutable mblas::Tensor RUH_2_;
+ mutable mblas::Tensor Temp_;
+ mutable mblas::Tensor Temp_1_;
+ mutable mblas::Tensor Temp_2_;
bool layerNormalization_;
};
diff --git a/src/amun/cpu/nematus/model.cpp b/src/amun/cpu/nematus/model.cpp
index de2da9a6..0a024937 100644
--- a/src/amun/cpu/nematus/model.cpp
+++ b/src/amun/cpu/nematus/model.cpp
@@ -20,13 +20,13 @@ Weights::Transition::Transition(const NpzConverter& model, TransitionType type,
switch(type) {
case TransitionType::Encoder:
Bx1_.emplace_back(1, Ux_.back().dim(1));
- const_cast<mblas::Matrix&>(Bx1_.back()) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx1_.back()) = 0.0f;
Bx2_.emplace_back(model(name(prefix, "bx", infix, i), true));
break;
case TransitionType::Decoder:
Bx1_.emplace_back(model(name(prefix, "bx", infix, i), true));
Bx2_.emplace_back(1, Ux_.back().dim(1));
- const_cast<mblas::Matrix&>(Bx2_.back()) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx2_.back()) = 0.0f;
break;
}
}
@@ -86,8 +86,8 @@ Weights::GRU::GRU(const NpzConverter& model, std::string prefix, std::vector<std
Ux_lns_(model[prefix + keys.at(12)]),
Ux_lnb_(model[prefix + keys.at(13)])
{
- const_cast<mblas::Matrix&>(Bx2_) = 0.0f;
- const_cast<mblas::Matrix&>(Bx3_) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx2_) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx3_) = 0.0f;
}
//////////////////////////////////////////////////////////////////////////////
@@ -119,8 +119,8 @@ Weights::DecGRU2::DecGRU2(const NpzConverter& model, std::string prefix, std::ve
Ux_lnb_(model[prefix + keys.at(13)]) // Ux_nl_lnb
{
- const_cast<mblas::Matrix&>(B_) = 0.0f;
- const_cast<mblas::Matrix&>(Bx1_) = 0.0f;
+ const_cast<mblas::Tensor&>(B_) = 0.0f;
+ const_cast<mblas::Tensor&>(Bx1_) = 0.0f;
}
Weights::DecAttention::DecAttention(const NpzConverter& model)
diff --git a/src/amun/cpu/nematus/model.h b/src/amun/cpu/nematus/model.h
index 82fe09f3..b378b434 100644
--- a/src/amun/cpu/nematus/model.h
+++ b/src/amun/cpu/nematus/model.h
@@ -6,7 +6,7 @@
#include "cpu/npz_converter.h"
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
namespace amunmt {
namespace CPU {
@@ -35,16 +35,16 @@ struct Weights {
TransitionType type_;
public:
- std::vector<mblas::Matrix> B_;
- std::vector<mblas::Matrix> Bx1_;
- std::vector<mblas::Matrix> Bx2_;
- std::vector<mblas::Matrix> U_;
- std::vector<mblas::Matrix> Ux_;
+ std::vector<mblas::Tensor> B_;
+ std::vector<mblas::Tensor> Bx1_;
+ std::vector<mblas::Tensor> Bx2_;
+ std::vector<mblas::Tensor> U_;
+ std::vector<mblas::Tensor> Ux_;
- std::vector<mblas::Matrix> U_lns_;
- std::vector<mblas::Matrix> U_lnb_;
- std::vector<mblas::Matrix> Ux_lns_;
- std::vector<mblas::Matrix> Ux_lnb_;
+ std::vector<mblas::Tensor> U_lns_;
+ std::vector<mblas::Tensor> U_lnb_;
+ std::vector<mblas::Tensor> Ux_lns_;
+ std::vector<mblas::Tensor> Ux_lnb_;
};
@@ -52,93 +52,93 @@ struct Weights {
Embeddings(const NpzConverter& model, const std::string &key);
Embeddings(const NpzConverter& model, const std::vector<std::pair<std::string, bool>> keys);
- const mblas::Matrix E_;
+ const mblas::Tensor E_;
};
struct GRU {
GRU(const NpzConverter& model, std::string prefix, std::vector<std::string> keys);
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Bx3_;
- const mblas::Matrix Ux_;
-
- const mblas::Matrix W_lns_;
- const mblas::Matrix W_lnb_;
- const mblas::Matrix Wx_lns_;
- const mblas::Matrix Wx_lnb_;
- const mblas::Matrix U_lns_;
- const mblas::Matrix U_lnb_;
- const mblas::Matrix Ux_lns_;
- const mblas::Matrix Ux_lnb_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Bx3_;
+ const mblas::Tensor Ux_;
+
+ const mblas::Tensor W_lns_;
+ const mblas::Tensor W_lnb_;
+ const mblas::Tensor Wx_lns_;
+ const mblas::Tensor Wx_lnb_;
+ const mblas::Tensor U_lns_;
+ const mblas::Tensor U_lnb_;
+ const mblas::Tensor Ux_lns_;
+ const mblas::Tensor Ux_lnb_;
};
struct DecInit {
DecInit(const NpzConverter& model);
- const mblas::Matrix Wi_;
- const mblas::Matrix Bi_;
- const mblas::Matrix lns_;
- const mblas::Matrix lnb_;
+ const mblas::Tensor Wi_;
+ const mblas::Tensor Bi_;
+ const mblas::Tensor lns_;
+ const mblas::Tensor lnb_;
};
struct DecGRU2 {
DecGRU2(const NpzConverter& model, std::string prefix, std::vector<std::string> keys);
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx3_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Ux_;
-
- const mblas::Matrix W_lns_;
- const mblas::Matrix W_lnb_;
- const mblas::Matrix Wx_lns_;
- const mblas::Matrix Wx_lnb_;
- const mblas::Matrix U_lns_;
- const mblas::Matrix U_lnb_;
- const mblas::Matrix Ux_lns_;
- const mblas::Matrix Ux_lnb_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx3_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Ux_;
+
+ const mblas::Tensor W_lns_;
+ const mblas::Tensor W_lnb_;
+ const mblas::Tensor Wx_lns_;
+ const mblas::Tensor Wx_lnb_;
+ const mblas::Tensor U_lns_;
+ const mblas::Tensor U_lnb_;
+ const mblas::Tensor Ux_lns_;
+ const mblas::Tensor Ux_lnb_;
};
struct DecAttention {
DecAttention(const NpzConverter& model);
- const mblas::Matrix V_;
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix C_;
- const mblas::Matrix Wc_att_lns_;
- const mblas::Matrix Wc_att_lnb_;
- const mblas::Matrix W_comb_lns_;
- const mblas::Matrix W_comb_lnb_;
+ const mblas::Tensor V_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor C_;
+ const mblas::Tensor Wc_att_lns_;
+ const mblas::Tensor Wc_att_lnb_;
+ const mblas::Tensor W_comb_lns_;
+ const mblas::Tensor W_comb_lnb_;
};
struct DecSoftmax {
DecSoftmax(const NpzConverter& model);
- const mblas::Matrix W1_;
- const mblas::Matrix B1_;
- const mblas::Matrix W2_;
- const mblas::Matrix B2_;
- const mblas::Matrix W3_;
- const mblas::Matrix B3_;
- const mblas::Matrix W4_;
- const mblas::Matrix B4_;
- const mblas::Matrix lns_1_;
- const mblas::Matrix lns_2_;
- const mblas::Matrix lns_3_;
- const mblas::Matrix lnb_1_;
- const mblas::Matrix lnb_2_;
- const mblas::Matrix lnb_3_;
+ const mblas::Tensor W1_;
+ const mblas::Tensor B1_;
+ const mblas::Tensor W2_;
+ const mblas::Tensor B2_;
+ const mblas::Tensor W3_;
+ const mblas::Tensor B3_;
+ const mblas::Tensor W4_;
+ const mblas::Tensor B4_;
+ const mblas::Tensor lns_1_;
+ const mblas::Tensor lns_2_;
+ const mblas::Tensor lns_3_;
+ const mblas::Tensor lnb_1_;
+ const mblas::Tensor lnb_2_;
+ const mblas::Tensor lnb_3_;
};
diff --git a/src/amun/cpu/nematus/transition.cpp b/src/amun/cpu/nematus/transition.cpp
index a5ac1c8f..d019c9fa 100644
--- a/src/amun/cpu/nematus/transition.cpp
+++ b/src/amun/cpu/nematus/transition.cpp
@@ -14,7 +14,7 @@ Transition::Transition(const Weights::Transition& model)
}
-void Transition::GetNextState(mblas::Matrix& state) const
+void Transition::GetNextState(mblas::Tensor& state) const
{
if (layerNormalization_) {
for (int i = 0; i < w_.size(); ++i) {
@@ -51,7 +51,7 @@ void Transition::GetNextState(mblas::Matrix& state) const
}
-void Transition::ElementwiseOps(mblas::Matrix& state, int idx) const {
+void Transition::ElementwiseOps(mblas::Tensor& state, int idx) const {
using namespace mblas;
using namespace blaze;
diff --git a/src/amun/cpu/nematus/transition.h b/src/amun/cpu/nematus/transition.h
index 3db3c72e..651ea9ca 100644
--- a/src/amun/cpu/nematus/transition.h
+++ b/src/amun/cpu/nematus/transition.h
@@ -1,6 +1,6 @@
#pragma once
-#include "cpu/mblas/matrix.h"
+#include "cpu/mblas/tensor.h"
#include "model.h"
namespace amunmt {
@@ -11,23 +11,23 @@ class Transition {
public:
Transition(const Weights::Transition& model);
- void GetNextState(mblas::Matrix& state) const;
+ void GetNextState(mblas::Tensor& state) const;
protected:
- void ElementwiseOps(mblas::Matrix& state, int idx) const;
+ void ElementwiseOps(mblas::Tensor& state, int idx) const;
private:
// Model matrices
const Weights::Transition& w_;
// reused to avoid allocation
- mutable mblas::Matrix UUx_;
- mutable mblas::Matrix RUH_;
- mutable mblas::Matrix RUH_1_;
- mutable mblas::Matrix RUH_2_;
- mutable mblas::Matrix Temp_;
- mutable mblas::Matrix Temp_1_;
- mutable mblas::Matrix Temp_2_;
+ mutable mblas::Tensor UUx_;
+ mutable mblas::Tensor RUH_;
+ mutable mblas::Tensor RUH_1_;
+ mutable mblas::Tensor RUH_2_;
+ mutable mblas::Tensor Temp_;
+ mutable mblas::Tensor Temp_1_;
+ mutable mblas::Tensor Temp_2_;
bool layerNormalization_;
};
diff --git a/src/amun/cpu/npz_converter.h b/src/amun/cpu/npz_converter.h
index dfc7381d..15414c2b 100644
--- a/src/amun/cpu/npz_converter.h
+++ b/src/amun/cpu/npz_converter.h
@@ -1,7 +1,7 @@
#pragma once
#include "cnpy/cnpy.h"
-#include "mblas/matrix.h"
+#include "mblas/tensor.h"
namespace amunmt {
namespace CPU {
@@ -66,7 +66,7 @@ class NpzConverter {
destructed_ = true;
}
- mblas::Matrix operator[](const std::string& key) const {
+ mblas::Tensor operator[](const std::string& key) const {
BlazeWrapper matrix;
auto it = model_.find(key);
if(it != model_.end()) {
@@ -79,19 +79,19 @@ class NpzConverter {
}
}
- mblas::Matrix ret;
+ mblas::Tensor ret;
ret = matrix;
return std::move(ret);
}
- mblas::Matrix getFirstOfMany(const std::vector<std::pair<std::string, bool>> keys) const {
+ mblas::Tensor getFirstOfMany(const std::vector<std::pair<std::string, bool>> keys) const {
BlazeWrapper matrix;
for (auto key : keys) {
auto it = model_.find(key.first);
if(it != model_.end()) {
NpyMatrixWrapper np(it->second);
matrix = BlazeWrapper(np.data(), np.size1(), np.size2());
- mblas::Matrix ret;
+ mblas::Tensor ret;
if (key.second) {
const auto matrix2 = blaze::trans(matrix);
ret = matrix2;
@@ -103,11 +103,11 @@ class NpzConverter {
}
std::cerr << "Matrix not found: " << keys[0].first << "\n";
- mblas::Matrix ret;
+ mblas::Tensor ret;
return std::move(ret);
}
- mblas::Matrix operator()(const std::string& key,
+ mblas::Tensor operator()(const std::string& key,
bool transpose) const {
BlazeWrapper matrix;
auto it = model_.find(key);
@@ -117,7 +117,7 @@ class NpzConverter {
} else {
std::cerr << "Missing " << key << std::endl;
}
- mblas::Matrix ret;
+ mblas::Tensor ret;
if (transpose) {
const auto matrix2 = blaze::trans(matrix);
ret = matrix2;
diff --git a/src/amun/fpga/best_hyps.cpp b/src/amun/fpga/best_hyps.cpp
index 029f66e3..be9f35ec 100644
--- a/src/amun/fpga/best_hyps.cpp
+++ b/src/amun/fpga/best_hyps.cpp
@@ -10,7 +10,7 @@ namespace amunmt {
namespace FPGA {
BestHyps::BestHyps(const God &god, const OpenCLInfo &openCLInfo)
-: BestHypsBase(
+: BaseBestHyps(
!god.Get<bool>("allow-unk"),
god.Get<bool>("n-best"),
god.Get<std::vector<std::string>>("softmax-filter").size(),
@@ -23,12 +23,12 @@ BestHyps::BestHyps(const God &god, const OpenCLInfo &openCLInfo)
//std::cerr << "BestHyps::BestHyps" << std::endl;
}
-void BestHyps::DisAllowUNK(mblas::Matrix& Prob)
+void BestHyps::DisAllowUNK(mblas::Tensor& Prob)
{
SetColumn(Prob, UNK_ID, std::numeric_limits<float>::lowest());
}
-void BestHyps::FindBests(const std::vector<uint>& beamSizes, mblas::Matrix& Probs,
+void BestHyps::FindBests(const std::vector<uint>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
const bool isFirst)
@@ -47,7 +47,7 @@ void BestHyps::CalcBeam(
/*
using namespace mblas;
- mblas::Matrix& Probs = static_cast<mblas::Matrix&>(scorers[0]->GetProbs());
+ mblas::Tensor& Probs = static_cast<mblas::Tensor&>(scorers[0]->GetProbs());
//cerr << "Probs=" << Probs.Debug(1) << endl;
std::vector<float> vCosts;
@@ -66,7 +66,7 @@ void BestHyps::CalcBeam(
//std::cerr << "1Probs=" << Probs.Debug(1) << std::endl;
for (size_t i = 1; i < scorers.size(); ++i) {
- mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+ mblas::Tensor &currProbs = static_cast<mblas::Tensor&>(scorers[i]->GetProbs());
float weight = weights_.at(scorers[0]->GetName());
ElementAddWeighted(Probs, weight, currProbs);
diff --git a/src/amun/fpga/best_hyps.h b/src/amun/fpga/best_hyps.h
index 625d07c3..be26e482 100644
--- a/src/amun/fpga/best_hyps.h
+++ b/src/amun/fpga/best_hyps.h
@@ -7,14 +7,14 @@
namespace amunmt {
namespace FPGA {
-class BestHyps : public BestHypsBase
+class BestHyps : public BaseBestHyps
{
public:
BestHyps(const God &god, const OpenCLInfo &openCLInfo);
- void DisAllowUNK(mblas::Matrix& Prob);
+ void DisAllowUNK(mblas::Tensor& Prob);
- void FindBests(const std::vector<uint>& beamSizes, mblas::Matrix& Probs,
+ void FindBests(const std::vector<uint>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
const bool isFirst);
diff --git a/src/amun/fpga/decoder.cpp b/src/amun/fpga/decoder.cpp
index d11d11c5..dc37b5b2 100644
--- a/src/amun/fpga/decoder.cpp
+++ b/src/amun/fpga/decoder.cpp
@@ -3,8 +3,8 @@
namespace amunmt {
namespace FPGA {
-void Decoder::EmptyState(mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+void Decoder::EmptyState(mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
size_t batchSize,
const Array<int>& batchMapping)
{
@@ -12,15 +12,15 @@ void Decoder::EmptyState(mblas::Matrix& State,
alignment_.Init(SourceContext);
}
-void Decoder::EmptyEmbedding(mblas::Matrix& Embedding, size_t batchSize) {
+void Decoder::EmptyEmbedding(mblas::Tensor& Embedding, size_t batchSize) {
Embedding.Resize(batchSize, embeddings_.GetCols());
mblas::Fill(Embedding, 0);
}
-void Decoder::Decode(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Embeddings,
- const mblas::Matrix& SourceContext,
+void Decoder::Decode(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embeddings,
+ const mblas::Tensor& SourceContext,
const Array<int>& mapping,
const std::vector<uint>& beamSizes)
{
@@ -40,15 +40,15 @@ void Decoder::Decode(mblas::Matrix& NextState,
}
-void Decoder::GetHiddenState(mblas::Matrix& HiddenState,
- const mblas::Matrix& PrevState,
- const mblas::Matrix& Embedding) {
+void Decoder::GetHiddenState(mblas::Tensor& HiddenState,
+ const mblas::Tensor& PrevState,
+ const mblas::Tensor& Embedding) {
rnn1_.GetNextState(HiddenState, PrevState, Embedding);
}
-void Decoder::GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext,
+void Decoder::GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext,
const Array<int>& mapping,
const std::vector<uint>& beamSizes)
{
@@ -57,21 +57,21 @@ void Decoder::GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
}
-void Decoder::GetNextState(mblas::Matrix& State,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& AlignedSourceContext)
+void Decoder::GetNextState(mblas::Tensor& State,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& AlignedSourceContext)
{
rnn2_.GetNextState(State, HiddenState, AlignedSourceContext);
}
-void Decoder::GetProbs(const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext)
+void Decoder::GetProbs(const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext)
{
softmax_.GetProbs(Probs_, State, Embedding, AlignedSourceContext);
}
-void Decoder::Lookup(mblas::Matrix& Embedding,
+void Decoder::Lookup(mblas::Tensor& Embedding,
const std::vector<uint>& w)
{
embeddings_.Lookup(Embedding, w);
diff --git a/src/amun/fpga/decoder.h b/src/amun/fpga/decoder.h
index 7314b1ec..124a1585 100644
--- a/src/amun/fpga/decoder.h
+++ b/src/amun/fpga/decoder.h
@@ -22,7 +22,7 @@ class Decoder {
, indices_(openCLInfo)
{}
- void Lookup(mblas::Matrix& Rows, const std::vector<uint>& ids)
+ void Lookup(mblas::Tensor& Rows, const std::vector<uint>& ids)
{
using namespace mblas;
std::vector<uint> tids = ids;
@@ -60,8 +60,8 @@ class Decoder {
, Temp2_(openCLInfo)
{}
- void InitializeState(mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+ void InitializeState(mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
const size_t batchSize,
const Array<int>& mapping)
{
@@ -92,9 +92,9 @@ class Decoder {
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) {
gru_.GetNextState(NextState, State, Context);
}
@@ -102,8 +102,8 @@ class Decoder {
const Weights1& w_;
const GRU<Weights2> gru_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
};
template <class Weights>
@@ -112,9 +112,9 @@ class Decoder {
RNNFinal(const OpenCLInfo &openCLInfo, const Weights& model)
: gru_(openCLInfo, model) {}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context)
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context)
{
gru_.GetNextState(NextState, State, Context);
}
@@ -136,7 +136,7 @@ class Decoder {
{
}
- void Init(const mblas::Matrix& SourceContext)
+ void Init(const mblas::Tensor& SourceContext)
{
using namespace mblas;
@@ -149,9 +149,9 @@ class Decoder {
}
}
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext,
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext,
const Array<int>& mapping,
const std::vector<uint>& beamSizes)
{
@@ -218,7 +218,7 @@ class Decoder {
//std::cerr << "2AlignedSourceContext=" << AlignedSourceContext.Debug() << std::endl;
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return A_;
}
@@ -227,10 +227,10 @@ class Decoder {
Array<int> dBatchMapping_;
- mblas::Matrix SCU_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
- mblas::Matrix A_;
+ mblas::Tensor SCU_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
+ mblas::Tensor A_;
};
@@ -245,10 +245,10 @@ class Decoder {
{
}
- void GetProbs(mblas::Matrix& Probs,
- const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext)
+ void GetProbs(mblas::Tensor& Probs,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext)
{
using namespace mblas;
@@ -295,9 +295,9 @@ class Decoder {
const Weights& w_;
bool filtered_;
- mblas::Matrix T1_;
- mblas::Matrix T2_;
- mblas::Matrix T3_;
+ mblas::Tensor T1_;
+ mblas::Tensor T2_;
+ mblas::Tensor T3_;
};
@@ -317,53 +317,53 @@ public:
return embeddings_.GetRows();
}
- mblas::Matrix& GetProbs() {
+ mblas::Tensor& GetProbs() {
return Probs_;
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return alignment_.GetAttention();
}
- void EmptyState(mblas::Matrix& State,
- const mblas::Matrix& SourceContext,
+ void EmptyState(mblas::Tensor& State,
+ const mblas::Tensor& SourceContext,
size_t batchSize,
const Array<int>& batchMapping);
- void EmptyEmbedding(mblas::Matrix& Embedding, size_t batchSize = 1);
+ void EmptyEmbedding(mblas::Tensor& Embedding, size_t batchSize = 1);
- void Decode(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Embeddings,
- const mblas::Matrix& SourceContext,
+ void Decode(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embeddings,
+ const mblas::Tensor& SourceContext,
const Array<int>& mapping,
const std::vector<uint>& beamSizes);
- void GetHiddenState(mblas::Matrix& HiddenState,
- const mblas::Matrix& PrevState,
- const mblas::Matrix& Embedding);
+ void GetHiddenState(mblas::Tensor& HiddenState,
+ const mblas::Tensor& PrevState,
+ const mblas::Tensor& Embedding);
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& SourceContext,
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& SourceContext,
const Array<int>& mapping,
const std::vector<uint>& beamSizes);
- void GetNextState(mblas::Matrix& State,
- const mblas::Matrix& HiddenState,
- const mblas::Matrix& AlignedSourceContext);
+ void GetNextState(mblas::Tensor& State,
+ const mblas::Tensor& HiddenState,
+ const mblas::Tensor& AlignedSourceContext);
- void GetProbs(const mblas::Matrix& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext);
+ void GetProbs(const mblas::Tensor& State,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext);
- void Lookup(mblas::Matrix& Embedding,
+ void Lookup(mblas::Tensor& Embedding,
const std::vector<uint>& w);
private:
- mblas::Matrix HiddenState_;
- mblas::Matrix AlignedSourceContext_;
- mblas::Matrix Probs_;
+ mblas::Tensor HiddenState_;
+ mblas::Tensor AlignedSourceContext_;
+ mblas::Tensor Probs_;
Embeddings<Weights::DecEmbeddings> embeddings_;
RNNHidden<Weights::DecInit, Weights::DecGRU1> rnn1_;
diff --git a/src/amun/fpga/encoder.cpp b/src/amun/fpga/encoder.cpp
index 03c945df..f65d342a 100644
--- a/src/amun/fpga/encoder.cpp
+++ b/src/amun/fpga/encoder.cpp
@@ -37,7 +37,7 @@ std::vector<std::vector<size_t>> GetBatchInput(const Sentences& source, size_t t
return matrix;
}
-void Encoder::Encode(const Sentences& source, size_t tab, mblas::Matrix& context,
+void Encoder::Encode(const Sentences& source, size_t tab, mblas::Tensor& context,
Array<int>& dMapping)
{
size_t maxSentenceLength = GetMaxLength(source, tab);
diff --git a/src/amun/fpga/encoder.h b/src/amun/fpga/encoder.h
index 7919923f..57701796 100644
--- a/src/amun/fpga/encoder.h
+++ b/src/amun/fpga/encoder.h
@@ -17,7 +17,7 @@ class Encoder {
: w_(model)
{}
- void Lookup(const OpenCLInfo &openCLInfo, mblas::Matrix& Row, const Words& words)
+ void Lookup(const OpenCLInfo &openCLInfo, mblas::Tensor& Row, const Words& words)
{
std::vector<uint> knownWords(words.size(), 1);
for (size_t i = 0; i < words.size(); ++i) {
@@ -68,19 +68,19 @@ class Encoder {
mblas::Fill(State_, 0.0f);
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Embd) {
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Embd) {
gru_.GetNextState(NextState, State, Embd);
}
template <class It>
- void Encode(It it, It end, mblas::Matrix& Context, size_t batchSize, bool invert,
+ void Encode(It it, It end, mblas::Tensor& Context, size_t batchSize, bool invert,
const Array<int>* mapping=nullptr)
{
InitializeState(batchSize);
- mblas::Matrix prevState(State_);
+ mblas::Tensor prevState(State_);
//std::cerr << "State_=" << State_.Debug(1) << std::endl;
//std::cerr << "prevState=" << prevState.Debug(1) << std::endl;
@@ -118,14 +118,14 @@ class Encoder {
// Model matrices
const GRU<Weights> gru_;
- mblas::Matrix State_;
+ mblas::Tensor State_;
};
public:
Encoder(const OpenCLInfo &openCLInfo, const Weights& model);
- void Encode(const Sentences& source, size_t tab, mblas::Matrix& Context,
+ void Encode(const Sentences& source, size_t tab, mblas::Tensor& Context,
Array<int>& dMapping);
protected:
@@ -134,8 +134,8 @@ protected:
RNN<Weights::EncBackwardGRU> backwardRnn_;
// reusing memory
- std::vector<mblas::Matrix> embeddedWords_;
- mblas::Matrix Context;
+ std::vector<mblas::Tensor> embeddedWords_;
+ mblas::Tensor Context;
const OpenCLInfo &openCLInfo_;
diff --git a/src/amun/fpga/encoder_decoder.cpp b/src/amun/fpga/encoder_decoder.cpp
index 9c7bc577..1848bfe0 100644
--- a/src/amun/fpga/encoder_decoder.cpp
+++ b/src/amun/fpga/encoder_decoder.cpp
@@ -106,7 +106,7 @@ unsigned EncoderDecoder::GetVocabSize() const
return decoder_->GetVocabSize();
}
-BaseMatrix& EncoderDecoder::GetProbs()
+BaseTensor& EncoderDecoder::GetProbs()
{
return decoder_->GetProbs();
}
diff --git a/src/amun/fpga/encoder_decoder.h b/src/amun/fpga/encoder_decoder.h
index 127f80d4..e5c4f1f6 100644
--- a/src/amun/fpga/encoder_decoder.h
+++ b/src/amun/fpga/encoder_decoder.h
@@ -41,11 +41,11 @@ public:
virtual size_t GetVocabSize() const;
- virtual BaseMatrix& GetProbs();
+ virtual BaseTensor& GetProbs();
protected:
const Weights& model_;
- mblas::Matrix sourceContext_;
+ mblas::Tensor sourceContext_;
std::unique_ptr<Encoder> encoder_;
std::unique_ptr<Decoder> decoder_;
diff --git a/src/amun/fpga/encoder_decoder_loader.cpp b/src/amun/fpga/encoder_decoder_loader.cpp
index 5ff380e4..fa146bbb 100644
--- a/src/amun/fpga/encoder_decoder_loader.cpp
+++ b/src/amun/fpga/encoder_decoder_loader.cpp
@@ -63,10 +63,10 @@ ScorerPtr EncoderDecoderLoader::NewScorer(const God &god, const DeviceInfo &devi
return ScorerPtr(ed);
}
-BestHypsBasePtr EncoderDecoderLoader::GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const
+BaseBestHypsPtr EncoderDecoderLoader::GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const
{
BestHyps *obj = new BestHyps(god, openCLInfo_);
- return BestHypsBasePtr(obj);
+ return BaseBestHypsPtr(obj);
}
diff --git a/src/amun/fpga/encoder_decoder_loader.h b/src/amun/fpga/encoder_decoder_loader.h
index dac88962..d450938c 100644
--- a/src/amun/fpga/encoder_decoder_loader.h
+++ b/src/amun/fpga/encoder_decoder_loader.h
@@ -18,7 +18,7 @@ public:
virtual void Load(const God &god);
virtual ScorerPtr NewScorer(const God &god, const DeviceInfo &deviceInfo) const;
- virtual BestHypsBasePtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const;
+ virtual BaseBestHypsPtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const;
protected:
diff --git a/src/amun/fpga/encoder_decoder_state.cpp b/src/amun/fpga/encoder_decoder_state.cpp
index 17bde56f..ab49ce4b 100644
--- a/src/amun/fpga/encoder_decoder_state.cpp
+++ b/src/amun/fpga/encoder_decoder_state.cpp
@@ -13,19 +13,19 @@ EncoderDecoderState::EncoderDecoderState(const OpenCLInfo &openCLInfo)
}
-mblas::Matrix& EncoderDecoderState::GetStates() {
+mblas::Tensor& EncoderDecoderState::GetStates() {
return states_;
}
-const mblas::Matrix& EncoderDecoderState::GetStates() const {
+const mblas::Tensor& EncoderDecoderState::GetStates() const {
return states_;
}
-mblas::Matrix& EncoderDecoderState::GetEmbeddings() {
+mblas::Tensor& EncoderDecoderState::GetEmbeddings() {
return embeddings_;
}
-const mblas::Matrix& EncoderDecoderState::GetEmbeddings() const {
+const mblas::Tensor& EncoderDecoderState::GetEmbeddings() const {
return embeddings_;
}
diff --git a/src/amun/fpga/encoder_decoder_state.h b/src/amun/fpga/encoder_decoder_state.h
index 8b569c99..e508c090 100644
--- a/src/amun/fpga/encoder_decoder_state.h
+++ b/src/amun/fpga/encoder_decoder_state.h
@@ -9,16 +9,16 @@ class EncoderDecoderState : public State {
public:
EncoderDecoderState(const OpenCLInfo &openCLInfo);
- mblas::Matrix& GetStates();
- mblas::Matrix& GetEmbeddings();
- const mblas::Matrix& GetStates() const;
- const mblas::Matrix& GetEmbeddings() const;
+ mblas::Tensor& GetStates();
+ mblas::Tensor& GetEmbeddings();
+ const mblas::Tensor& GetStates() const;
+ const mblas::Tensor& GetEmbeddings() const;
virtual std::string Debug(size_t verbosity = 1) const;
protected:
- mblas::Matrix states_;
- mblas::Matrix embeddings_;
+ mblas::Tensor states_;
+ mblas::Tensor embeddings_;
};
diff --git a/src/amun/fpga/gru.h b/src/amun/fpga/gru.h
index 362a3435..84294b19 100644
--- a/src/amun/fpga/gru.h
+++ b/src/amun/fpga/gru.h
@@ -24,9 +24,9 @@ public:
return w_.U_.dim(0);
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) const
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) const
{
using namespace mblas;
@@ -92,13 +92,13 @@ protected:
const Weights& w_;
// reused to avoid allocation
- mutable mblas::Matrix RU_;
- mutable mblas::Matrix H_;
- mutable mblas::Matrix R_;
- mutable mblas::Matrix U_;
+ mutable mblas::Tensor RU_;
+ mutable mblas::Tensor H_;
+ mutable mblas::Tensor R_;
+ mutable mblas::Tensor U_;
- mutable mblas::Matrix Temp1_;
- mutable mblas::Matrix Temp2_;
+ mutable mblas::Tensor Temp1_;
+ mutable mblas::Tensor Temp2_;
};
@@ -148,9 +148,9 @@ public:
return w_.U_.dim(0);
}
- void GetNextState(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& Context) const
+ void GetNextState(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& Context) const
{
using namespace mblas;
@@ -181,10 +181,10 @@ public:
}
- void ElementwiseOps(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& RUH,
- const mblas::Matrix& Temp) const
+ void ElementwiseOps(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& RUH,
+ const mblas::Tensor& Temp) const
{
const uint rows = State.dim(0) * State.dim(2) * State.dim(3);
const uint cols = State.dim(1);
@@ -202,11 +202,11 @@ protected:
const Weights& w_;
// reused to avoid allocation
- mutable mblas::Matrix WWx_;
- mutable mblas::Matrix UUx_;
+ mutable mblas::Tensor WWx_;
+ mutable mblas::Tensor UUx_;
- mutable mblas::Matrix RUH_;
- mutable mblas::Matrix Temp_;
+ mutable mblas::Tensor RUH_;
+ mutable mblas::Tensor Temp_;
};
diff --git a/src/amun/fpga/matrix.cpp b/src/amun/fpga/matrix.cpp
index d47cb13f..42e6c03c 100644
--- a/src/amun/fpga/matrix.cpp
+++ b/src/amun/fpga/matrix.cpp
@@ -78,7 +78,7 @@ void Matrix::Reshape2D()
std::string Matrix::Debug(size_t verbosity) const
{
std::stringstream strm;
- strm << BaseMatrix::Debug(verbosity) << " " << arr_.Debug(verbosity);
+ strm << BaseTensor::Debug(verbosity) << " " << arr_.Debug(verbosity);
//cerr << "Debug1=" << strm.str() << endl;
return strm.str();
diff --git a/src/amun/fpga/matrix.h b/src/amun/fpga/matrix.h
index 94ee30e4..6af26c19 100644
--- a/src/amun/fpga/matrix.h
+++ b/src/amun/fpga/matrix.h
@@ -1,5 +1,5 @@
#pragma once
-#include "common/base_matrix.h"
+#include "common/base_tensor.h"
#include "types-fpga.h"
#include "array.h"
@@ -7,7 +7,7 @@ namespace amunmt {
namespace FPGA {
namespace mblas {
-class Matrix : public BaseMatrix {
+class Matrix : public BaseTensor {
public:
Matrix(const OpenCLInfo &openCLInfo);
Matrix(const OpenCLInfo &openCLInfo, size_t rows, size_t cols, bool zero = false);
diff --git a/src/amun/fpga/matrix_functions.cpp b/src/amun/fpga/matrix_functions.cpp
index 017bb062..7d373420 100644
--- a/src/amun/fpga/matrix_functions.cpp
+++ b/src/amun/fpga/matrix_functions.cpp
@@ -233,13 +233,13 @@ Matrix& Prod(Matrix& C, const Matrix& A, const Matrix& B,
return C;
}
-void ElementwiseOps(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& RUH,
- const mblas::Matrix& Temp,
- const mblas::Matrix& B,
- const mblas::Matrix& Bx1,
- const mblas::Matrix& Bx2,
+void ElementwiseOps(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& RUH,
+ const mblas::Tensor& Temp,
+ const mblas::Tensor& B,
+ const mblas::Tensor& Bx1,
+ const mblas::Tensor& Bx2,
const uint &rows,
const uint &cols)
{
@@ -536,7 +536,7 @@ void SetColumn(Matrix& In, int noColumn, float value)
void MaxElement(
Array<float> &d_out,
const Array<int> &d_ind,
- mblas::Matrix &d_in,
+ mblas::Tensor &d_in,
int numBatches,
const Array<int> &batchFirstElementIdxs)
{
@@ -554,7 +554,7 @@ void MaxElement(
void NthElement(
Array<float>& d_out,
Array<unsigned> &d_ind,
- const mblas::Matrix &Probs,
+ const mblas::Tensor &Probs,
const Array<uint> &beamSizes,
size_t maxBatchSize,
const Array<uint> &d_cummulatedBeamSizes,
diff --git a/src/amun/fpga/matrix_functions.h b/src/amun/fpga/matrix_functions.h
index b6684b37..af4a1cc4 100644
--- a/src/amun/fpga/matrix_functions.h
+++ b/src/amun/fpga/matrix_functions.h
@@ -56,13 +56,13 @@ Matrix& Prod(Matrix& C, const Matrix& A, const Matrix& B,
inline void Normalization(Matrix& out, const Matrix& in, const Matrix& alpha, float eps)
{}
-void ElementwiseOps(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& RUH,
- const mblas::Matrix& Temp,
- const mblas::Matrix& B,
- const mblas::Matrix& Bx1,
- const mblas::Matrix& Bx2,
+void ElementwiseOps(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& RUH,
+ const mblas::Tensor& Temp,
+ const mblas::Tensor& B,
+ const mblas::Tensor& Bx1,
+ const mblas::Tensor& Bx2,
const uint &rows,
const uint &cols);
@@ -106,7 +106,7 @@ void SetColumn(Matrix& In, int noColumn, float value);
void MaxElement(
Array<float> &d_out,
const Array<int> &d_ind,
- mblas::Matrix &d_in,
+ mblas::Tensor &d_in,
int numBatches,
const Array<int> &batchFirstElementIdxs);
//float* d_out, int* d_ind, float* d_in, int numBatches, int* batchFirstElementIdxs
@@ -114,7 +114,7 @@ void MaxElement(
void NthElement(
Array<float>& d_out,
Array<unsigned> &d_ind,
- const mblas::Matrix &Probs,
+ const mblas::Tensor &Probs,
const Array<uint> &beamSizes,
size_t maxBatchSize,
const Array<uint> &d_cummulatedBeamSizes,
diff --git a/src/amun/fpga/model.h b/src/amun/fpga/model.h
index 8d681291..c0a274db 100644
--- a/src/amun/fpga/model.h
+++ b/src/amun/fpga/model.h
@@ -17,7 +17,7 @@ struct Weights {
//std::cerr << "E_=" << E_.Debug() << std::endl;
}
- const mblas::Matrix E_;
+ const mblas::Tensor E_;
};
struct EncForwardGRU {
@@ -33,15 +33,15 @@ struct Weights {
Gamma_2_(model.GetMatrix(openCLInfo, "encoder_gamma2"))
{ }
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Ux_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Ux_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
struct EncBackwardGRU {
@@ -57,15 +57,15 @@ struct Weights {
Gamma_2_(model.GetMatrix(openCLInfo, "encoder_r_gamma2"))
{}
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Ux_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Ux_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
//////////////////////////////////////////////////////////////////////////////
@@ -74,7 +74,7 @@ struct Weights {
: E_(model.GetMatrix(openCLInfo, "Wemb_dec"))
{}
- const mblas::Matrix E_;
+ const mblas::Tensor E_;
};
struct DecInit {
@@ -84,9 +84,9 @@ struct Weights {
Gamma_(model.GetMatrix(openCLInfo, "ff_state_gamma"))
{}
- const mblas::Matrix Wi_;
- const mblas::Matrix Bi_;
- const mblas::Matrix Gamma_;
+ const mblas::Tensor Wi_;
+ const mblas::Tensor Bi_;
+ const mblas::Tensor Gamma_;
};
struct DecGRU1 {
@@ -102,15 +102,15 @@ struct Weights {
Gamma_2_(model.GetMatrix(openCLInfo, "decoder_cell1_gamma2"))
{}
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Ux_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Ux_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
struct DecGRU2 {
@@ -126,15 +126,15 @@ struct Weights {
Gamma_2_(model.GetMatrix(openCLInfo, "decoder_cell2_gamma2"))
{}
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix Wx_;
- const mblas::Matrix Bx2_;
- const mblas::Matrix Bx1_;
- const mblas::Matrix Ux_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor Wx_;
+ const mblas::Tensor Bx2_;
+ const mblas::Tensor Bx1_;
+ const mblas::Tensor Ux_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
struct DecAlignment {
@@ -148,13 +148,13 @@ struct Weights {
Gamma_2_(model.GetMatrix(openCLInfo, "decoder_att_gamma2"))
{}
- const mblas::Matrix V_;
- const mblas::Matrix W_;
- const mblas::Matrix B_;
- const mblas::Matrix U_;
- const mblas::Matrix C_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor V_;
+ const mblas::Tensor W_;
+ const mblas::Tensor B_;
+ const mblas::Tensor U_;
+ const mblas::Tensor C_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
struct DecSoftmax {
@@ -172,17 +172,17 @@ struct Weights {
Gamma_2_(model.GetMatrix(openCLInfo, "ff_logit_l1_gamma2"))
{}
- const mblas::Matrix W1_;
- const mblas::Matrix B1_;
- const mblas::Matrix W2_;
- const mblas::Matrix B2_;
- const mblas::Matrix W3_;
- const mblas::Matrix B3_;
- const mblas::Matrix W4_;
- const mblas::Matrix B4_;
- const mblas::Matrix Gamma_0_;
- const mblas::Matrix Gamma_1_;
- const mblas::Matrix Gamma_2_;
+ const mblas::Tensor W1_;
+ const mblas::Tensor B1_;
+ const mblas::Tensor W2_;
+ const mblas::Tensor B2_;
+ const mblas::Tensor W3_;
+ const mblas::Tensor B3_;
+ const mblas::Tensor W4_;
+ const mblas::Tensor B4_;
+ const mblas::Tensor Gamma_0_;
+ const mblas::Tensor Gamma_1_;
+ const mblas::Tensor Gamma_2_;
};
//////////////////////////////////////////////////////////////////////////////
diff --git a/src/amun/fpga/npz_converter.cpp b/src/amun/fpga/npz_converter.cpp
index 7ce9bdd4..16dac0a6 100644
--- a/src/amun/fpga/npz_converter.cpp
+++ b/src/amun/fpga/npz_converter.cpp
@@ -13,13 +13,13 @@ NpzConverter::NpzConverter(const std::string& file)
cerr << "file=" << file << endl;
}
-mblas::Matrix NpzConverter::GetMatrix(
+mblas::Tensor NpzConverter::GetMatrix(
const OpenCLInfo &openCLInfo,
const std::string& key,
bool transpose
) const
{
- mblas::Matrix matrix(openCLInfo);
+ mblas::Tensor matrix(openCLInfo);
//cerr << "key1=" << key << " " << matrix.Debug(1) << endl;
cnpy::npz_t::const_iterator it = model_.find(key);
diff --git a/src/amun/fpga/npz_converter.h b/src/amun/fpga/npz_converter.h
index 6aa95eb1..565204ee 100644
--- a/src/amun/fpga/npz_converter.h
+++ b/src/amun/fpga/npz_converter.h
@@ -43,7 +43,7 @@ public:
model_.destruct();
}
- mblas::Matrix GetMatrix(
+ mblas::Tensor GetMatrix(
const OpenCLInfo &openCLInfo,
const std::string& key,
bool transpose = false) const;
diff --git a/src/amun/fpga/nth_element.cpp b/src/amun/fpga/nth_element.cpp
index 5b2dcf69..e32aa1e8 100644
--- a/src/amun/fpga/nth_element.cpp
+++ b/src/amun/fpga/nth_element.cpp
@@ -17,7 +17,7 @@ NthElement::NthElement(const OpenCLInfo &openCLInfo, size_t maxBeamSize, size_t
}
-void NthElement::getNBestList(const std::vector<uint>& beamSizes, mblas::Matrix& Probs,
+void NthElement::getNBestList(const std::vector<uint>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts, std::vector<unsigned>& outKeys,
const bool isFirst)
{
diff --git a/src/amun/fpga/nth_element.h b/src/amun/fpga/nth_element.h
index bd3b0615..e926c749 100644
--- a/src/amun/fpga/nth_element.h
+++ b/src/amun/fpga/nth_element.h
@@ -15,7 +15,7 @@ public:
NthElement(const NthElement &copy) = delete;
NthElement(const OpenCLInfo &openCLInfo, size_t maxBeamSize, size_t maxBatchSize);
- void getNBestList(const std::vector<uint>& beamSizes, mblas::Matrix& Probs,
+ void getNBestList(const std::vector<uint>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts, std::vector<unsigned>& outKeys,
const bool isFirst);
diff --git a/src/amun/gpu/decoder/ape_penalty.h b/src/amun/gpu/decoder/ape_penalty.h
index 54259151..3e969987 100644
--- a/src/amun/gpu/decoder/ape_penalty.h
+++ b/src/amun/gpu/decoder/ape_penalty.h
@@ -8,7 +8,7 @@
#include "common/base_best_hyps.h"
#include "common/loader.h"
-#include "gpu/mblas/matrix.h"
+#include "gpu/mblas/tensor.h"
namespace GPU {
@@ -55,7 +55,7 @@ class ApePenalty : public Scorer {
private:
std::vector<float> costs_;
const SrcTrgMap& srcTrgMap_;
- mblas::Matrix Probs_;
+ mblas::Tensor Probs_;
const Penalties& penalties_;
};
diff --git a/src/amun/gpu/decoder/best_hyps.cu b/src/amun/gpu/decoder/best_hyps.cu
index 4a921a30..9bcac353 100644
--- a/src/amun/gpu/decoder/best_hyps.cu
+++ b/src/amun/gpu/decoder/best_hyps.cu
@@ -6,7 +6,7 @@ namespace amunmt {
namespace GPU {
BestHyps::BestHyps(const God &god)
- : BestHypsBase(god),
+ : BaseBestHyps(god),
keys_(god.Get<unsigned>("beam-size") * god.Get<unsigned>("mini-batch")),
costs_(god.Get<unsigned>("beam-size") * god.Get<unsigned>("mini-batch")),
maxBeamSize_(god.Get<unsigned>("beam-size"))
@@ -17,11 +17,11 @@ BestHyps::BestHyps(const God &god)
}
}
-void BestHyps::DisAllowUNK(mblas::Matrix& Prob) {
+void BestHyps::DisAllowUNK(mblas::Tensor& Prob) {
SetColumn(Prob, UNK_ID, std::numeric_limits<float>::lowest());
}
-void BestHyps::FindBests(const std::vector<unsigned>& beamSizes, mblas::Matrix& Probs,
+void BestHyps::FindBests(const std::vector<unsigned>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
const bool isFirst)
@@ -30,7 +30,7 @@ void BestHyps::FindBests(const std::vector<unsigned>& beamSizes, mblas::Matrix&
}
// fast fused softmax and nth_element
-void BestHyps::FindBests(const std::vector<unsigned>& beamSizes, mblas::Matrix& Probs,
+void BestHyps::FindBests(const std::vector<unsigned>& beamSizes, mblas::Tensor& Probs,
mblas::Vector<NthOutBatch> &nBest,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
@@ -45,7 +45,7 @@ std::vector<SoftAlignmentPtr> BestHyps::GetAlignments(const std::vector<ScorerPt
std::vector<SoftAlignmentPtr> alignments;
for (auto& scorer : scorers) {
if (GPU::EncoderDecoder* encdec = dynamic_cast<GPU::EncoderDecoder*>(scorer.get())) {
- const mblas::Matrix &attention = encdec->GetAttention();
+ const mblas::Tensor &attention = encdec->GetAttention();
unsigned attLength = attention.dim(1);
SoftAlignment *softAlignment = new SoftAlignment(attLength);
@@ -76,7 +76,7 @@ void BestHyps::CalcBeam(
using namespace mblas;
- mblas::Matrix& Probs = static_cast<mblas::Matrix&>(scorers[0]->GetProbs());
+ mblas::Tensor& Probs = static_cast<mblas::Tensor&>(scorers[0]->GetProbs());
std::vector<float> vCosts;
for (auto& h : prevHyps) {
@@ -97,7 +97,7 @@ void BestHyps::CalcBeam(
const bool isFirst = (vCosts[0] == 0.0f) ? true : false;
if (god_.UseFusedSoftmax()) {
- const mblas::Matrix& b4 = *static_cast<const mblas::Matrix*>(scorers[0]->GetBias());
+ const mblas::Tensor& b4 = *static_cast<const mblas::Tensor*>(scorers[0]->GetBias());
mblas::Vector<NthOutBatch> &nBest = *static_cast<mblas::Vector<NthOutBatch>*>(scorers[0]->GetNBest());
nBest.newSize(beamSizeSum);
@@ -115,7 +115,7 @@ void BestHyps::CalcBeam(
BroadcastVecColumn(weights_.at(scorers[0]->GetName()) * _1 + _2, Probs, costs_);
for (unsigned i = 1; i < scorers.size(); ++i) {
- mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+ mblas::Tensor &currProbs = static_cast<mblas::Tensor&>(scorers[i]->GetProbs());
Element(_1 + weights_.at(scorers[i]->GetName()) * _2, Probs, currProbs);
}
@@ -132,7 +132,7 @@ void BestHyps::CalcBeam(
breakDowns.push_back(bestCosts);
for (unsigned i = 1; i < scorers.size(); ++i) {
std::vector<float> modelCosts(beamSizeSum);
- mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+ mblas::Tensor &currProbs = static_cast<mblas::Tensor&>(scorers[i]->GetProbs());
nthElement_->getValueByKey(modelCosts, currProbs);
breakDowns.push_back(modelCosts);
@@ -194,7 +194,7 @@ void BestHyps::CalcBeam(
//////////////////////////////////////////////////////////////////////////
void BestHyps::getNBestList(const std::vector<unsigned>& beamSizes,
- mblas::Matrix& Probs,
+ mblas::Tensor& Probs,
mblas::Vector<NthOutBatch> &nBest,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
diff --git a/src/amun/gpu/decoder/best_hyps.h b/src/amun/gpu/decoder/best_hyps.h
index 47695f36..5fa05476 100644
--- a/src/amun/gpu/decoder/best_hyps.h
+++ b/src/amun/gpu/decoder/best_hyps.h
@@ -8,7 +8,7 @@
#include "common/exception.h"
#include "common/god.h"
#include "common/utils.h"
-#include "gpu/mblas/matrix_functions.h"
+#include "gpu/mblas/tensor_functions.h"
#include "gpu/mblas/nth_element.h"
#include "gpu/mblas/vector.h"
@@ -17,16 +17,16 @@
namespace amunmt {
namespace GPU {
-class BestHyps : public BestHypsBase
+class BestHyps : public BaseBestHyps
{
public:
BestHyps(const BestHyps &copy) = delete;
BestHyps(const God &god);
- void DisAllowUNK(mblas::Matrix& Prob);
+ void DisAllowUNK(mblas::Tensor& Prob);
// standard nth_element
- void FindBests(const std::vector<unsigned>& beamSizes, mblas::Matrix& Probs,
+ void FindBests(const std::vector<unsigned>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
const bool isFirst);
@@ -48,14 +48,14 @@ class BestHyps : public BestHypsBase
unsigned maxBeamSize_;
// fast fused softmax and nth_element
- void FindBests(const std::vector<unsigned>& beamSizes, mblas::Matrix& Probs,
+ void FindBests(const std::vector<unsigned>& beamSizes, mblas::Tensor& Probs,
mblas::Vector<NthOutBatch> &nBest,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
const bool isFirst);
void getNBestList(const std::vector<unsigned>& beamSizes,
- mblas::Matrix& Probs,
+ mblas::Tensor& Probs,
mblas::Vector<NthOutBatch> &nBest,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
diff --git a/src/amun/gpu/decoder/encoder_decoder.cu b/src/amun/gpu/decoder/encoder_decoder.cu
index 4ce69c4f..e43202b0 100644
--- a/src/amun/gpu/decoder/encoder_decoder.cu
+++ b/src/amun/gpu/decoder/encoder_decoder.cu
@@ -5,7 +5,7 @@
#include "common/sentences.h"
#include "encoder_decoder.h"
-#include "gpu/mblas/matrix_functions.h"
+#include "gpu/mblas/tensor_functions.h"
#include "gpu/dl4mt/dl4mt.h"
#include "gpu/decoder/encoder_decoder_state.h"
#include "gpu/decoder/best_hyps.h"
@@ -29,7 +29,7 @@ EncoderDecoder::EncoderDecoder(
encoder_(new Encoder(model_, config)),
decoder_(new Decoder(god, model_, config)),
indices_(god.Get<unsigned>("beam-size")),
- SourceContext_(new mblas::Matrix())
+ SourceContext_(new mblas::Tensor())
{
BEGIN_TIMER("EncoderDecoder");
}
@@ -144,11 +144,11 @@ void EncoderDecoder::AssembleBeamState(const State& in,
//PAUSE_TIMER("AssembleBeamState");
}
-void EncoderDecoder::GetAttention(mblas::Matrix& Attention) {
+void EncoderDecoder::GetAttention(mblas::Tensor& Attention) {
decoder_->GetAttention(Attention);
}
-BaseMatrix& EncoderDecoder::GetProbs() {
+BaseTensor& EncoderDecoder::GetProbs() {
return decoder_->GetProbs();
}
@@ -157,12 +157,12 @@ void *EncoderDecoder::GetNBest()
return &decoder_->GetNBest();
}
-const BaseMatrix *EncoderDecoder::GetBias() const
+const BaseTensor *EncoderDecoder::GetBias() const
{
return decoder_->GetBias();
}
-mblas::Matrix& EncoderDecoder::GetAttention() {
+mblas::Tensor& EncoderDecoder::GetAttention() {
return decoder_->GetAttention();
}
diff --git a/src/amun/gpu/decoder/encoder_decoder.h b/src/amun/gpu/decoder/encoder_decoder.h
index 7c9ccac7..d145559e 100644
--- a/src/amun/gpu/decoder/encoder_decoder.h
+++ b/src/amun/gpu/decoder/encoder_decoder.h
@@ -7,7 +7,7 @@
#include "common/base_best_hyps.h"
#include "common/threadpool.h"
#include "gpu/types-gpu.h"
-#include "gpu/mblas/matrix.h"
+#include "gpu/mblas/tensor.h"
#include "gpu/mblas/handles.h"
#include "gpu/mblas/vector.h"
@@ -46,13 +46,13 @@ class EncoderDecoder : public Scorer {
const Beam& beam,
State& out);
- void GetAttention(mblas::Matrix& Attention);
+ void GetAttention(mblas::Tensor& Attention);
- mblas::Matrix& GetAttention();
- virtual BaseMatrix& GetProbs();
+ mblas::Tensor& GetAttention();
+ virtual BaseTensor& GetProbs();
virtual void *GetNBest();
- virtual const BaseMatrix *GetBias() const;
+ virtual const BaseTensor *GetBias() const;
unsigned GetVocabSize() const;
@@ -67,7 +67,7 @@ class EncoderDecoder : public Scorer {
mblas::Vector<unsigned> sentenceLengths_;
// set in Encoder::GetContext() to length (maxSentenceLength * batchSize). 1 if it's a word, 0 otherwise
- std::unique_ptr<mblas::Matrix> SourceContext_;
+ std::unique_ptr<mblas::Tensor> SourceContext_;
EncoderDecoder(const EncoderDecoder&) = delete;
diff --git a/src/amun/gpu/decoder/encoder_decoder_loader.cu b/src/amun/gpu/decoder/encoder_decoder_loader.cu
index f6496d53..8fc0c1ec 100644
--- a/src/amun/gpu/decoder/encoder_decoder_loader.cu
+++ b/src/amun/gpu/decoder/encoder_decoder_loader.cu
@@ -60,8 +60,8 @@ ScorerPtr EncoderDecoderLoader::NewScorer(const God &god, const DeviceInfo &devi
tab, *weights_[d]));
}
-BestHypsBasePtr EncoderDecoderLoader::GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const {
- BestHypsBasePtr obj(new GPU::BestHyps(god));
+BaseBestHypsPtr EncoderDecoderLoader::GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const {
+ BaseBestHypsPtr obj(new GPU::BestHyps(god));
//std::thread::id this_id = std::this_thread::get_id();
//std::cerr << "deviceInfo=" << deviceInfo << " thread " << this_id << " sleeping...\n";
diff --git a/src/amun/gpu/decoder/encoder_decoder_loader.h b/src/amun/gpu/decoder/encoder_decoder_loader.h
index cad4887b..4377bdd0 100644
--- a/src/amun/gpu/decoder/encoder_decoder_loader.h
+++ b/src/amun/gpu/decoder/encoder_decoder_loader.h
@@ -17,7 +17,7 @@ class EncoderDecoderLoader : public Loader {
virtual void Load(const God &god);
virtual ScorerPtr NewScorer(const God &god, const DeviceInfo &deviceInfo) const;
- virtual BestHypsBasePtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const;
+ virtual BaseBestHypsPtr GetBestHyps(const God &god, const DeviceInfo &deviceInfo) const;
private:
std::vector<std::unique_ptr<Weights>> weights_; // MUST be indexed by gpu id. eg. weights_[2] is for gpu2
diff --git a/src/amun/gpu/decoder/encoder_decoder_state.cu b/src/amun/gpu/decoder/encoder_decoder_state.cu
index 1c8be9b3..3fb0351b 100644
--- a/src/amun/gpu/decoder/encoder_decoder_state.cu
+++ b/src/amun/gpu/decoder/encoder_decoder_state.cu
@@ -21,7 +21,7 @@ CellState& EncoderDecoderState::GetStates() {
return states_;
}
-mblas::Matrix& EncoderDecoderState::GetEmbeddings() {
+mblas::Tensor& EncoderDecoderState::GetEmbeddings() {
return embeddings_;
}
@@ -29,7 +29,7 @@ const CellState& EncoderDecoderState::GetStates() const {
return states_;
}
-const mblas::Matrix& EncoderDecoderState::GetEmbeddings() const {
+const mblas::Tensor& EncoderDecoderState::GetEmbeddings() const {
return embeddings_;
}
diff --git a/src/amun/gpu/decoder/encoder_decoder_state.h b/src/amun/gpu/decoder/encoder_decoder_state.h
index 9399b850..713abc68 100644
--- a/src/amun/gpu/decoder/encoder_decoder_state.h
+++ b/src/amun/gpu/decoder/encoder_decoder_state.h
@@ -16,13 +16,13 @@ class EncoderDecoderState : public State {
virtual std::string Debug(unsigned verbosity = 1) const;
CellState& GetStates();
- mblas::Matrix& GetEmbeddings();
+ mblas::Tensor& GetEmbeddings();
const CellState& GetStates() const;
- const mblas::Matrix& GetEmbeddings() const;
+ const mblas::Tensor& GetEmbeddings() const;
private:
CellState states_;
- mblas::Matrix embeddings_;
+ mblas::Tensor embeddings_;
};
}
diff --git a/src/amun/gpu/decoder/language_model.h b/src/amun/gpu/decoder/language_model.h
index 728eb2f2..7fc9f33c 100644
--- a/src/amun/gpu/decoder/language_model.h
+++ b/src/amun/gpu/decoder/language_model.h
@@ -4,7 +4,7 @@
#include "types.h"
#include "scorer.h"
-#include "matrix.h"
+#include "tensor.h"
#include "dl4mt.h"
#include "threadpool.h"
#include "kenlm.h"
diff --git a/src/amun/gpu/dl4mt/cell.h b/src/amun/gpu/dl4mt/cell.h
index d058874d..7b4ac955 100644
--- a/src/amun/gpu/dl4mt/cell.h
+++ b/src/amun/gpu/dl4mt/cell.h
@@ -1,6 +1,6 @@
#pragma once
-#include "gpu/mblas/matrix_functions.h"
-#include "gpu/mblas/matrix_wrapper.h"
+#include "gpu/mblas/tensor_functions.h"
+#include "gpu/mblas/tensor_wrapper.h"
#include "gpu/mblas/handles.h"
#include "cellstate.h"
@@ -18,7 +18,7 @@ class Cell {
public:
virtual void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) const = 0;
+ const mblas::Tensor& Context) const = 0;
virtual CellLength GetStateLength() const = 0;
};
diff --git a/src/amun/gpu/dl4mt/cellstate.h b/src/amun/gpu/dl4mt/cellstate.h
index d22b0e8d..3a316a55 100644
--- a/src/amun/gpu/dl4mt/cellstate.h
+++ b/src/amun/gpu/dl4mt/cellstate.h
@@ -1,6 +1,6 @@
#pragma once
-#include "gpu/mblas/matrix_functions.h"
-#include "gpu/mblas/matrix_wrapper.h"
+#include "gpu/mblas/tensor_functions.h"
+#include "gpu/mblas/tensor_wrapper.h"
#include "gpu/mblas/handles.h"
namespace amunmt {
@@ -8,15 +8,15 @@ namespace GPU {
struct CellState {
CellState(){
- output = std::unique_ptr<mblas::Matrix>(new mblas::Matrix());
- cell = std::unique_ptr<mblas::Matrix>(new mblas::Matrix());
+ output = std::unique_ptr<mblas::Tensor>(new mblas::Tensor());
+ cell = std::unique_ptr<mblas::Tensor>(new mblas::Tensor());
};
- CellState(std::unique_ptr<mblas::Matrix> cell, std::unique_ptr<mblas::Matrix> output):
+ CellState(std::unique_ptr<mblas::Tensor> cell, std::unique_ptr<mblas::Tensor> output):
cell(std::move(cell)), output(std::move(output)) {}
- std::unique_ptr<mblas::Matrix> output;
- std::unique_ptr<mblas::Matrix> cell;
+ std::unique_ptr<mblas::Tensor> output;
+ std::unique_ptr<mblas::Tensor> cell;
};
}
}
diff --git a/src/amun/gpu/dl4mt/decoder.h b/src/amun/gpu/dl4mt/decoder.h
index d9eca43b..9f61aff4 100644
--- a/src/amun/gpu/dl4mt/decoder.h
+++ b/src/amun/gpu/dl4mt/decoder.h
@@ -3,7 +3,7 @@
#include <yaml-cpp/yaml.h>
#include "gpu/mblas/vector.h"
-#include "gpu/mblas/matrix_functions.h"
+#include "gpu/mblas/tensor_functions.h"
#include "model.h"
#include "gru.h"
#include "lstm.h"
@@ -24,7 +24,7 @@ class Decoder {
: w_(model)
{}
- void Lookup(mblas::Matrix& Rows, const std::vector<unsigned>& ids) {
+ void Lookup(mblas::Tensor& Rows, const std::vector<unsigned>& ids) {
using namespace mblas;
std::vector<unsigned> tids = ids;
for(auto&& id : tids)
@@ -65,7 +65,7 @@ class Decoder {
{}
void InitializeState(CellState& State,
- const mblas::Matrix& SourceContext,
+ const mblas::Tensor& SourceContext,
const unsigned batchSize,
const mblas::Vector<unsigned> &sentenceLengths)
{
@@ -104,7 +104,7 @@ class Decoder {
void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) {
+ const mblas::Tensor& Context) {
gru_->GetNextState(NextState, State, Context);
}
@@ -112,7 +112,7 @@ class Decoder {
const Weights& w_;
std::unique_ptr<Cell> gru_;
- mblas::Matrix Temp2_;
+ mblas::Tensor Temp2_;
RNNHidden(const RNNHidden&) = delete;
};
@@ -124,7 +124,7 @@ class Decoder {
void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) {
+ const mblas::Tensor& Context) {
gru_->GetNextState(NextState, State, Context);
}
@@ -142,7 +142,7 @@ class Decoder {
, dBatchMapping_(god.Get<unsigned>("mini-batch") * god.Get<unsigned>("beam-size"), 0)
{}
- void Init(const mblas::Matrix& SourceContext) {
+ void Init(const mblas::Tensor& SourceContext) {
using namespace mblas;
Prod(/*h_[0],*/ SCU_, SourceContext, *w_.U_);
@@ -168,9 +168,9 @@ class Decoder {
return ret;
}
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
const CellState& HiddenState,
- const mblas::Matrix& SourceContext,
+ const mblas::Tensor& SourceContext,
const std::vector<unsigned>& h_sentenceLengths,
const mblas::Vector<unsigned> &sentenceLengths,
const std::vector<unsigned>& beamSizes)
@@ -245,11 +245,11 @@ class Decoder {
PAUSE_TIMER("GetAlignedSourceContext");
}
- void GetAttention(mblas::Matrix& Attention) {
+ void GetAttention(mblas::Tensor& Attention) {
mblas::Copy(Attention, A_);
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return A_;
}
@@ -258,13 +258,13 @@ class Decoder {
mblas::Vector<unsigned> dBatchMapping_;
- mblas::Matrix SCU_;
- mblas::Matrix Temp1_;
- mblas::Matrix Temp2_;
- mblas::Matrix A_;
+ mblas::Tensor SCU_;
+ mblas::Tensor Temp1_;
+ mblas::Tensor Temp2_;
+ mblas::Tensor A_;
- mblas::Matrix Ones_;
- mblas::Matrix Sums_;
+ mblas::Tensor Ones_;
+ mblas::Tensor Sums_;
Alignment(const Alignment&) = delete;
};
@@ -290,11 +290,11 @@ class Decoder {
}
}
- void GetProbs(mblas::Matrix& Probs,
- std::shared_ptr<mblas::Matrix> &b4,
+ void GetProbs(mblas::Tensor& Probs,
+ std::shared_ptr<mblas::Tensor> &b4,
const CellState& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext,
bool useFusedSoftmax)
{
using namespace mblas;
@@ -339,7 +339,7 @@ class Decoder {
Element(Tanh(_1 + _2 + _3), T1_, T2_, T3_);
//PAUSE_TIMER("GetProbs.Element");
- std::shared_ptr<mblas::Matrix> w4;
+ std::shared_ptr<mblas::Tensor> w4;
if(!filtered_) {
w4 = w_.W4_;
b4 = w_.B4_;
@@ -348,6 +348,8 @@ class Decoder {
b4.reset(&FilteredB4_);
}
+ BEGIN_TIMER("OutputLayer");
+
BEGIN_TIMER("GetProbs.Prod4");
Prod(Probs, T1_, *w4);
PAUSE_TIMER("GetProbs.Prod4");
@@ -361,6 +363,8 @@ class Decoder {
mblas::LogSoftmax(Probs);
PAUSE_TIMER("GetProbs.LogSoftMax");
}
+
+ PAUSE_TIMER("OutputLayer");
}
void Filter(const std::vector<unsigned>& ids) {
@@ -379,15 +383,15 @@ class Decoder {
const Weights& w_;
bool filtered_;
- mblas::Matrix FilteredW4_;
- mblas::Matrix FilteredB4_;
+ mblas::Tensor FilteredW4_;
+ mblas::Tensor FilteredB4_;
- mblas::Matrix T1_;
- mblas::Matrix T2_;
- mblas::Matrix T3_;
+ mblas::Tensor T1_;
+ mblas::Tensor T2_;
+ mblas::Tensor T3_;
- mblas::Matrix TempW4;
- mblas::Matrix TempB4;
+ mblas::Tensor TempW4;
+ mblas::Tensor TempB4;
Softmax(const Softmax&) = delete;
};
@@ -403,8 +407,8 @@ class Decoder {
void Decode(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Embeddings,
- const mblas::Matrix& SourceContext,
+ const mblas::Tensor& Embeddings,
+ const mblas::Tensor& SourceContext,
const std::vector<unsigned>& h_sentenceLengths,
const mblas::Vector<unsigned> &sentenceLengths,
const std::vector<unsigned>& beamSizes,
@@ -443,12 +447,12 @@ class Decoder {
//PAUSE_TIMER("Decode");
}
- mblas::Matrix& GetProbs() {
+ mblas::Tensor& GetProbs() {
return Probs_;
}
void EmptyState(CellState& State,
- const mblas::Matrix& SourceContext,
+ const mblas::Tensor& SourceContext,
unsigned batchSize,
const mblas::Vector<unsigned> &sentenceLengths)
{
@@ -456,12 +460,12 @@ class Decoder {
alignment_.Init(SourceContext);
}
- void EmptyEmbedding(mblas::Matrix& Embedding, unsigned batchSize = 1) {
+ void EmptyEmbedding(mblas::Tensor& Embedding, unsigned batchSize = 1) {
Embedding.NewSize(batchSize, embeddings_.GetCols());
mblas::Fill(Embedding, 0);
}
- void Lookup(mblas::Matrix& Embedding,
+ void Lookup(mblas::Tensor& Embedding,
const std::vector<unsigned>& w) {
embeddings_.Lookup(Embedding, w);
}
@@ -470,7 +474,7 @@ class Decoder {
softmax_.Filter(ids);
}
- void GetAttention(mblas::Matrix& Attention) {
+ void GetAttention(mblas::Tensor& Attention) {
alignment_.GetAttention(Attention);
}
@@ -478,7 +482,7 @@ class Decoder {
return embeddings_.GetRows();
}
- mblas::Matrix& GetAttention() {
+ mblas::Tensor& GetAttention() {
return alignment_.GetAttention();
}
@@ -486,7 +490,7 @@ class Decoder {
return nBest_;
}
- const mblas::Matrix *GetBias() const {
+ const mblas::Tensor *GetBias() const {
return b4_.get();
}
@@ -494,13 +498,13 @@ class Decoder {
void GetHiddenState(CellState& HiddenState,
const CellState& PrevState,
- const mblas::Matrix& Embedding) {
+ const mblas::Tensor& Embedding) {
rnn1_.GetNextState(HiddenState, PrevState, Embedding);
}
- void GetAlignedSourceContext(mblas::Matrix& AlignedSourceContext,
+ void GetAlignedSourceContext(mblas::Tensor& AlignedSourceContext,
const CellState& HiddenState,
- const mblas::Matrix& SourceContext,
+ const mblas::Tensor& SourceContext,
const std::vector<unsigned>& h_sentenceLengths,
const mblas::Vector<unsigned> &sentenceLengths,
const std::vector<unsigned>& beamSizes)
@@ -515,14 +519,14 @@ class Decoder {
void GetNextState(CellState& State,
const CellState& HiddenState,
- const mblas::Matrix& AlignedSourceContext) {
+ const mblas::Tensor& AlignedSourceContext) {
rnn2_.GetNextState(State, HiddenState, AlignedSourceContext);
}
void GetProbs(const CellState& State,
- const mblas::Matrix& Embedding,
- const mblas::Matrix& AlignedSourceContext,
+ const mblas::Tensor& Embedding,
+ const mblas::Tensor& AlignedSourceContext,
bool useFusedSoftmax)
{
softmax_.GetProbs(Probs_, b4_, State, Embedding, AlignedSourceContext, useFusedSoftmax);
@@ -559,8 +563,8 @@ class Decoder {
private:
CellState HiddenState_;
- mblas::Matrix AlignedSourceContext_;
- mblas::Matrix Probs_;
+ mblas::Tensor AlignedSourceContext_;
+ mblas::Tensor Probs_;
Embeddings<Weights::DecEmbeddings> embeddings_;
RNNHidden<Weights::DecInit> rnn1_;
@@ -569,7 +573,7 @@ class Decoder {
Softmax<Weights::DecSoftmax> softmax_;
mblas::Vector<NthOutBatch> nBest_;
- std::shared_ptr<mblas::Matrix> b4_;
+ std::shared_ptr<mblas::Tensor> b4_;
Decoder(const Decoder&) = delete;
};
diff --git a/src/amun/gpu/dl4mt/encoder.cu b/src/amun/gpu/dl4mt/encoder.cu
index b3f14c81..03380992 100644
--- a/src/amun/gpu/dl4mt/encoder.cu
+++ b/src/amun/gpu/dl4mt/encoder.cu
@@ -64,7 +64,7 @@ std::vector<std::vector<FactWord>> GetBatchInput(const Sentences& source, unsign
void Encoder::Encode(const Sentences& source,
unsigned tab,
- mblas::Matrix& context,
+ mblas::Tensor& context,
std::vector<unsigned> &h_sentenceLengths,
mblas::Vector<unsigned> &sentenceLengths)
{
diff --git a/src/amun/gpu/dl4mt/encoder.h b/src/amun/gpu/dl4mt/encoder.h
index 557f64f6..c29b5556 100644
--- a/src/amun/gpu/dl4mt/encoder.h
+++ b/src/amun/gpu/dl4mt/encoder.h
@@ -2,7 +2,7 @@
#include <yaml-cpp/yaml.h>
-#include "gpu/mblas/matrix_functions.h"
+#include "gpu/mblas/tensor_functions.h"
#include "model.h"
#include "gru.h"
#include "common/sentence.h"
@@ -28,7 +28,7 @@ class Encoder {
: w_(model)
{}
- void Lookup(mblas::Matrix& Row, const std::vector<std::vector<Word>>& words) {
+ void Lookup(mblas::Tensor& Row, const std::vector<std::vector<Word>>& words) {
std::vector<std::vector<unsigned>> knownWords(w_.Es_.size(),
std::vector<unsigned>(words.size(), 1));
unsigned factorCount = w_.Es_.size();
@@ -36,7 +36,7 @@ class Encoder {
const std::vector<Word>& factors = words[i];
for (unsigned factorIdx = 0; factorIdx < factors.size(); ++factorIdx) {
const Word& factor = factors[factorIdx];
- const std::shared_ptr<mblas::Matrix>& Emb = w_.Es_.at(factorIdx);
+ const std::shared_ptr<mblas::Tensor>& Emb = w_.Es_.at(factorIdx);
if (factor < Emb->dim(0)) {
knownWords[factorIdx][i] = factor;
@@ -48,14 +48,14 @@ class Encoder {
unsigned wordCount = words.size() / factorCount;
//Row.NewSize(0, wordCount);
- /* std::vector<std::shared_ptr<mblas::Matrix>>::iterator eit = w_.Es_.begin(); */
+ /* std::vector<std::shared_ptr<mblas::Tensor>>::iterator eit = w_.Es_.begin(); */
/* std::vector<HostVector<unsigned>>::iterator wit = knownWords.begin(); */
for (unsigned i = 0; i < knownWords.size(); i++) {
const std::vector<unsigned>& factorWords = knownWords.at(i);
mblas::Vector<unsigned> dKnownWords(factorWords);
- const std::shared_ptr<mblas::Matrix>& Emb = w_.Es_.at(i);
- mblas::Matrix factorRow;
+ const std::shared_ptr<mblas::Tensor>& Emb = w_.Es_.at(i);
+ mblas::Tensor factorRow;
factorRow.NewSize(wordCount, Emb->dim(1));
mblas::Assemble(factorRow, *Emb, dKnownWords);
mblas::Transpose(factorRow);
@@ -103,19 +103,19 @@ class Encoder {
void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Embd) {
+ const mblas::Tensor& Embd) {
gru_->GetNextState(NextState, State, Embd);
}
template <class It>
- void Encode(It it, It end, mblas::Matrix& Context,
+ void Encode(It it, It end, mblas::Tensor& Context,
unsigned batchSize, bool invert,
const mblas::Vector<unsigned> *sentenceLengths=nullptr)
{
InitializeState(batchSize);
- CellState prevState(std::unique_ptr<mblas::Matrix>(new mblas::Matrix(*(State_.cell))),
- std::unique_ptr<mblas::Matrix>(new mblas::Matrix(*(State_.output))));
+ CellState prevState(std::unique_ptr<mblas::Tensor>(new mblas::Tensor(*(State_.cell))),
+ std::unique_ptr<mblas::Tensor>(new mblas::Tensor(*(State_.output))));
unsigned n = std::distance(it, end);
unsigned i = 0;
@@ -166,7 +166,7 @@ class Encoder {
void Encode(const Sentences& words,
unsigned tab,
- mblas::Matrix& context,
+ mblas::Tensor& context,
std::vector<unsigned> &h_sentenceLengths,
mblas::Vector<unsigned> &sentenceLengths);
@@ -180,7 +180,7 @@ class Encoder {
RNN backwardRnn_;
// reusing memory
- std::vector<mblas::Matrix> embeddedWords_;
+ std::vector<mblas::Tensor> embeddedWords_;
Encoder(const Encoder&) = delete;
};
diff --git a/src/amun/gpu/dl4mt/gru.cu b/src/amun/gpu/dl4mt/gru.cu
index a3b4d7ae..4b371e1d 100644
--- a/src/amun/gpu/dl4mt/gru.cu
+++ b/src/amun/gpu/dl4mt/gru.cu
@@ -5,13 +5,13 @@ using namespace std;
namespace amunmt {
namespace GPU {
-__global__ void gElementwiseOps(mblas::MatrixWrapper<float> outWrap,
- const mblas::MatrixWrapper<float> stateWrap,
- const mblas::MatrixWrapper<float> ruhWrap,
- const mblas::MatrixWrapper<float> tempWrap,
- const mblas::MatrixWrapper<float> bWrap,
- const mblas::MatrixWrapper<float> bx1Wrap,
- const mblas::MatrixWrapper<float> bx2Wrap)
+__global__ void gElementwiseOps(mblas::TensorWrapper<float> outWrap,
+ const mblas::TensorWrapper<float> stateWrap,
+ const mblas::TensorWrapper<float> ruhWrap,
+ const mblas::TensorWrapper<float> tempWrap,
+ const mblas::TensorWrapper<float> bWrap,
+ const mblas::TensorWrapper<float> bx1Wrap,
+ const mblas::TensorWrapper<float> bx2Wrap)
{
const unsigned rows = stateWrap.dim(0);
const unsigned cols = stateWrap.dim(1);
diff --git a/src/amun/gpu/dl4mt/gru.h b/src/amun/gpu/dl4mt/gru.h
index fd9e80dc..8b82d81a 100644
--- a/src/amun/gpu/dl4mt/gru.h
+++ b/src/amun/gpu/dl4mt/gru.h
@@ -1,7 +1,7 @@
#pragma once
#include <boost/timer/timer.hpp>
-#include "gpu/mblas/matrix_functions.h"
-#include "gpu/mblas/matrix_wrapper.h"
+#include "gpu/mblas/tensor_functions.h"
+#include "gpu/mblas/tensor_wrapper.h"
#include "gpu/mblas/handles.h"
#include "gpu/dl4mt/cell.h"
#include "cellstate.h"
@@ -17,7 +17,7 @@ class SlowGRU: public Cell {
virtual void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) const {
+ const mblas::Tensor& Context) const {
using namespace mblas;
//std::cerr << std::endl;
@@ -90,25 +90,25 @@ class SlowGRU: public Cell {
const Weights& w_;
// reused to avoid allocation
- mutable mblas::Matrix RU_;
- mutable mblas::Matrix R_;
- mutable mblas::Matrix U_;
- mutable mblas::Matrix H_;
- mutable mblas::Matrix Temp1_;
- mutable mblas::Matrix Temp2_;
+ mutable mblas::Tensor RU_;
+ mutable mblas::Tensor R_;
+ mutable mblas::Tensor U_;
+ mutable mblas::Tensor H_;
+ mutable mblas::Tensor Temp1_;
+ mutable mblas::Tensor Temp2_;
SlowGRU(const SlowGRU&) = delete;
};
///////////////////////////////////////////////////////////////////////////////////////////////
-__global__ void gElementwiseOps(mblas::MatrixWrapper<float> outWrap,
- const mblas::MatrixWrapper<float> stateWrap,
- const mblas::MatrixWrapper<float> ruhWrap,
- const mblas::MatrixWrapper<float> tempWrap,
- const mblas::MatrixWrapper<float> bWrap,
- const mblas::MatrixWrapper<float> bx1Wrap,
- const mblas::MatrixWrapper<float> bx2Wrap);
+__global__ void gElementwiseOps(mblas::TensorWrapper<float> outWrap,
+ const mblas::TensorWrapper<float> stateWrap,
+ const mblas::TensorWrapper<float> ruhWrap,
+ const mblas::TensorWrapper<float> tempWrap,
+ const mblas::TensorWrapper<float> bWrap,
+ const mblas::TensorWrapper<float> bx1Wrap,
+ const mblas::TensorWrapper<float> bx2Wrap);
template <class Weights>
class FastGRU: public Cell {
@@ -129,7 +129,7 @@ class FastGRU: public Cell {
//std::cerr << "w_.W_=" << w_.W_.Debug(1) << std::endl;
//std::cerr << "1WWx_=" << WWx_.Debug(1) << std::endl;
- Matrix WxT;
+ Tensor WxT;
Transpose(WxT, *w_.Wx_);
//std::cerr << "w_.Wx_=" << w_.Wx_.Debug(1) << std::endl;
//std::cerr << "WxT=" << WxT.Debug(1) << std::endl;
@@ -141,7 +141,7 @@ class FastGRU: public Cell {
//std::cerr << "3WWx_=" << WWx_.Debug(1) << std::endl;
Transpose(UUx_, *w_.U_);
- Matrix UxT;
+ Tensor UxT;
Transpose(UxT, *w_.Ux_);
Concat(UUx_, UxT);
Transpose(UUx_);
@@ -151,7 +151,7 @@ class FastGRU: public Cell {
virtual void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) const {
+ const mblas::Tensor& Context) const {
using namespace mblas;
//std::cerr << std::endl;
@@ -183,10 +183,10 @@ class FastGRU: public Cell {
}
- void ElementwiseOps(mblas::Matrix& NextState,
- const mblas::Matrix& State,
- const mblas::Matrix& RUH,
- const mblas::Matrix& Temp) const
+ void ElementwiseOps(mblas::Tensor& NextState,
+ const mblas::Tensor& State,
+ const mblas::Tensor& RUH,
+ const mblas::Tensor& Temp) const
{
//BEGIN_TIMER("ElementwiseOps");
@@ -200,13 +200,13 @@ class FastGRU: public Cell {
NextState.NewSize(State.dim(0), State.dim(1), 1, 1);
//std::cerr << "NextState=" << NextState.Debug() << std::endl;
- mblas::MatrixWrapper<float> nextWrap(NextState);
- const mblas::MatrixWrapper<float> stateWrap(State);
- const mblas::MatrixWrapper<float> ruhWrap(RUH);
- const mblas::MatrixWrapper<float> tempWrap(Temp);
- const mblas::MatrixWrapper<float> bWrap(*w_.B_);
- const mblas::MatrixWrapper<float> bx1Wrap(*w_.Bx1_);
- const mblas::MatrixWrapper<float> bx2Wrap(*w_.Bx2_);
+ mblas::TensorWrapper<float> nextWrap(NextState);
+ const mblas::TensorWrapper<float> stateWrap(State);
+ const mblas::TensorWrapper<float> ruhWrap(RUH);
+ const mblas::TensorWrapper<float> tempWrap(Temp);
+ const mblas::TensorWrapper<float> bWrap(*w_.B_);
+ const mblas::TensorWrapper<float> bx1Wrap(*w_.Bx1_);
+ const mblas::TensorWrapper<float> bx2Wrap(*w_.Bx2_);
/*
std::cerr << "nextWrap=" << nextWrap.Debug() << std::endl;
@@ -243,11 +243,11 @@ class FastGRU: public Cell {
const Weights& w_;
// reused to avoid allocation
- mutable mblas::Matrix WWx_;
- mutable mblas::Matrix UUx_;
+ mutable mblas::Tensor WWx_;
+ mutable mblas::Tensor UUx_;
- mutable mblas::Matrix RUH_;
- mutable mblas::Matrix Temp_;
+ mutable mblas::Tensor RUH_;
+ mutable mblas::Tensor Temp_;
FastGRU(const FastGRU&) = delete;
};
diff --git a/src/amun/gpu/dl4mt/lstm.h b/src/amun/gpu/dl4mt/lstm.h
index b236e01a..2f343db6 100644
--- a/src/amun/gpu/dl4mt/lstm.h
+++ b/src/amun/gpu/dl4mt/lstm.h
@@ -1,7 +1,7 @@
#pragma once
#include <boost/timer/timer.hpp>
-#include "gpu/mblas/matrix_functions.h"
-#include "gpu/mblas/matrix_wrapper.h"
+#include "gpu/mblas/tensor_functions.h"
+#include "gpu/mblas/tensor_wrapper.h"
#include "gpu/mblas/handles.h"
#include "gpu/dl4mt/cell.h"
#include "cellstate.h"
@@ -17,7 +17,7 @@ class SlowLSTM: public Cell {
virtual void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) const {
+ const mblas::Tensor& Context) const {
using namespace mblas;
/* HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); */
@@ -66,13 +66,13 @@ class SlowLSTM: public Cell {
const Weights& w_;
// reused to avoid allocation
- mutable mblas::Matrix FIO_;
- mutable mblas::Matrix F_;
- mutable mblas::Matrix I_;
- mutable mblas::Matrix O_;
- mutable mblas::Matrix H_;
- mutable mblas::Matrix Temp1_;
- mutable mblas::Matrix Temp2_;
+ mutable mblas::Tensor FIO_;
+ mutable mblas::Tensor F_;
+ mutable mblas::Tensor I_;
+ mutable mblas::Tensor O_;
+ mutable mblas::Tensor H_;
+ mutable mblas::Tensor Temp1_;
+ mutable mblas::Tensor Temp2_;
SlowLSTM(const SlowLSTM&) = delete;
};
diff --git a/src/amun/gpu/dl4mt/model.cu b/src/amun/gpu/dl4mt/model.cu
index 9386f704..9942d93c 100644
--- a/src/amun/gpu/dl4mt/model.cu
+++ b/src/amun/gpu/dl4mt/model.cu
@@ -11,7 +11,7 @@ namespace GPU {
for(int i=1; true; i++) {
std::string factorKey = "Wemb" + std::to_string(i);
- std::shared_ptr<mblas::Matrix> factorEmb = model.get(factorKey, false);
+ std::shared_ptr<mblas::Tensor> factorEmb = model.get(factorKey, false);
if (factorEmb->size() <= 0){
break;
}
@@ -26,7 +26,7 @@ Weights::EncForwardGRU::EncForwardGRU(const NpzConverter& model)
U_(model.get("encoder_U", true)),
Wx_(model.get("encoder_Wx", true)),
Bx1_(model.get("encoder_bx", true, true)),
- Bx2_(new mblas::Matrix(Bx1_->dim(0), Bx1_->dim(1), Bx1_->dim(2), Bx1_->dim(3), true)),
+ Bx2_(new mblas::Tensor(Bx1_->dim(0), Bx1_->dim(1), Bx1_->dim(2), Bx1_->dim(3), true)),
Ux_(model.get("encoder_Ux", true)),
Gamma_1_(model.get("encoder_gamma1", false)),
Gamma_2_(model.get("encoder_gamma2", false))
@@ -63,7 +63,7 @@ Weights::EncBackwardGRU::EncBackwardGRU(const NpzConverter& model)
U_(model.get("encoder_r_U", true)),
Wx_(model.get("encoder_r_Wx", true)),
Bx1_(model.get("encoder_r_bx", true, true)),
- Bx2_(new mblas::Matrix( Bx1_->dim(0), Bx1_->dim(1), Bx1_->dim(2), Bx1_->dim(3), true)),
+ Bx2_(new mblas::Tensor( Bx1_->dim(0), Bx1_->dim(1), Bx1_->dim(2), Bx1_->dim(3), true)),
Ux_(model.get("encoder_r_Ux", true)),
Gamma_1_(model.get("encoder_r_gamma1", false)),
Gamma_2_(model.get("encoder_r_gamma2", false))
@@ -89,7 +89,7 @@ Weights::DecGRU1::DecGRU1(const NpzConverter& model)
U_(model.get("decoder_U", true)),
Wx_(model.get("decoder_Wx", true)),
Bx1_(model.get("decoder_bx", true, true)),
- Bx2_(new mblas::Matrix(Bx1_->dim(0), Bx1_->dim(1), Bx1_->dim(2), Bx1_->dim(3), true)),
+ Bx2_(new mblas::Tensor(Bx1_->dim(0), Bx1_->dim(1), Bx1_->dim(2), Bx1_->dim(3), true)),
Ux_(model.get("decoder_Ux", true)),
Gamma_1_(model.get("decoder_cell1_gamma1", false)),
Gamma_2_(model.get("decoder_cell1_gamma2", false))
@@ -102,7 +102,7 @@ Weights::DecGRU2::DecGRU2(const NpzConverter& model)
U_(model.get("decoder_U_nl", true)),
Wx_(model.get("decoder_Wcx", true)),
Bx2_(model.get("decoder_bx_nl", true, true)),
- Bx1_(new mblas::Matrix(Bx2_->dim(0), Bx2_->dim(1), Bx2_->dim(2), Bx2_->dim(3), true)),
+ Bx1_(new mblas::Tensor(Bx2_->dim(0), Bx2_->dim(1), Bx2_->dim(2), Bx2_->dim(3), true)),
Ux_(model.get("decoder_Ux_nl", true)),
Gamma_1_(model.get("decoder_cell2_gamma1", false)),
Gamma_2_(model.get("decoder_cell2_gamma2", false))
diff --git a/src/amun/gpu/dl4mt/model.h b/src/amun/gpu/dl4mt/model.h
index f95c4424..0829d233 100644
--- a/src/amun/gpu/dl4mt/model.h
+++ b/src/amun/gpu/dl4mt/model.h
@@ -4,7 +4,7 @@
#include <string>
#include <yaml-cpp/yaml.h>
-#include "gpu/mblas/matrix.h"
+#include "gpu/mblas/tensor.h"
#include "gpu/npz_converter.h"
namespace amunmt {
@@ -19,7 +19,7 @@ struct Weights {
// Embedding matrices for word factors. The first factor is the word
// surface form. The rest are optional.
- std::vector<std::shared_ptr<mblas::Matrix>> Es_;
+ std::vector<std::shared_ptr<mblas::Tensor>> Es_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -27,15 +27,15 @@ struct Weights {
EncForwardGRU(const EncForwardGRU&) = delete;
EncForwardGRU(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx1_;
- const std::shared_ptr<mblas::Matrix> Bx2_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx1_;
+ const std::shared_ptr<mblas::Tensor> Bx2_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -43,15 +43,15 @@ struct Weights {
EncBackwardGRU(const EncBackwardGRU&) = delete;
EncBackwardGRU(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx1_;
- const std::shared_ptr<mblas::Matrix> Bx2_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx1_;
+ const std::shared_ptr<mblas::Tensor> Bx2_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -59,14 +59,14 @@ struct Weights {
EncForwardLSTM(const EncForwardLSTM&) = delete;
EncForwardLSTM(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -74,14 +74,14 @@ struct Weights {
EncBackwardLSTM(const EncBackwardLSTM&) = delete;
EncBackwardLSTM(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -90,7 +90,7 @@ struct Weights {
DecEmbeddings(const DecEmbeddings&) = delete;
DecEmbeddings(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> E_;
+ const std::shared_ptr<mblas::Tensor> E_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -99,9 +99,9 @@ struct Weights {
DecInit(const DecInit&) = delete;
DecInit(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> Wi_;
- const std::shared_ptr<mblas::Matrix> Bi_;
- const std::shared_ptr<mblas::Matrix> Gamma_;
+ const std::shared_ptr<mblas::Tensor> Wi_;
+ const std::shared_ptr<mblas::Tensor> Bi_;
+ const std::shared_ptr<mblas::Tensor> Gamma_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -110,15 +110,15 @@ struct Weights {
DecGRU1(const DecGRU1&) = delete;
DecGRU1(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx1_;
- const std::shared_ptr<mblas::Matrix> Bx2_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx1_;
+ const std::shared_ptr<mblas::Tensor> Bx2_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -127,15 +127,15 @@ struct Weights {
DecGRU2(const DecGRU2&) = delete;
DecGRU2(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx2_;
- const std::shared_ptr<mblas::Matrix> Bx1_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx2_;
+ const std::shared_ptr<mblas::Tensor> Bx1_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -144,14 +144,14 @@ struct Weights {
DecLSTM1(const DecLSTM1&) = delete;
DecLSTM1(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -160,14 +160,14 @@ struct Weights {
DecLSTM2(const DecLSTM2&) = delete;
DecLSTM2(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> Wx_;
- const std::shared_ptr<mblas::Matrix> Bx_;
- const std::shared_ptr<mblas::Matrix> Ux_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> Wx_;
+ const std::shared_ptr<mblas::Tensor> Bx_;
+ const std::shared_ptr<mblas::Tensor> Ux_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -184,10 +184,10 @@ struct Weights {
Um_(model.get(p(prefix, "Um"), true)),
Bmu_(model.get(p(prefix, "bmu"), true, true))
{}
- const std::shared_ptr<mblas::Matrix> Wm_;
- const std::shared_ptr<mblas::Matrix> Bm_;
- const std::shared_ptr<mblas::Matrix> Um_;
- const std::shared_ptr<mblas::Matrix> Bmu_;
+ const std::shared_ptr<mblas::Tensor> Wm_;
+ const std::shared_ptr<mblas::Tensor> Bm_;
+ const std::shared_ptr<mblas::Tensor> Um_;
+ const std::shared_ptr<mblas::Tensor> Bmu_;
private:
std::string p(std::string prefix, std::string sufix){
return prefix + "_" + sufix;
@@ -200,13 +200,13 @@ struct Weights {
DecAlignment(const DecAlignment&) = delete;
DecAlignment(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> V_;
- const std::shared_ptr<mblas::Matrix> W_;
- const std::shared_ptr<mblas::Matrix> B_;
- const std::shared_ptr<mblas::Matrix> U_;
- const std::shared_ptr<mblas::Matrix> C_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> V_;
+ const std::shared_ptr<mblas::Tensor> W_;
+ const std::shared_ptr<mblas::Tensor> B_;
+ const std::shared_ptr<mblas::Tensor> U_;
+ const std::shared_ptr<mblas::Tensor> C_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -215,17 +215,17 @@ struct Weights {
DecSoftmax(const DecSoftmax&) = delete;
DecSoftmax(const NpzConverter& model);
- const std::shared_ptr<mblas::Matrix> W1_;
- const std::shared_ptr<mblas::Matrix> B1_;
- const std::shared_ptr<mblas::Matrix> W2_;
- const std::shared_ptr<mblas::Matrix> B2_;
- const std::shared_ptr<mblas::Matrix> W3_;
- const std::shared_ptr<mblas::Matrix> B3_;
- const std::shared_ptr<mblas::Matrix> W4_;
- const std::shared_ptr<mblas::Matrix> B4_;
- const std::shared_ptr<mblas::Matrix> Gamma_0_;
- const std::shared_ptr<mblas::Matrix> Gamma_1_;
- const std::shared_ptr<mblas::Matrix> Gamma_2_;
+ const std::shared_ptr<mblas::Tensor> W1_;
+ const std::shared_ptr<mblas::Tensor> B1_;
+ const std::shared_ptr<mblas::Tensor> W2_;
+ const std::shared_ptr<mblas::Tensor> B2_;
+ const std::shared_ptr<mblas::Tensor> W3_;
+ const std::shared_ptr<mblas::Tensor> B3_;
+ const std::shared_ptr<mblas::Tensor> W4_;
+ const std::shared_ptr<mblas::Tensor> B4_;
+ const std::shared_ptr<mblas::Tensor> Gamma_0_;
+ const std::shared_ptr<mblas::Tensor> Gamma_1_;
+ const std::shared_ptr<mblas::Tensor> Gamma_2_;
};
////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/amun/gpu/dl4mt/multiplicative.h b/src/amun/gpu/dl4mt/multiplicative.h
index e3265cd0..644413d9 100644
--- a/src/amun/gpu/dl4mt/multiplicative.h
+++ b/src/amun/gpu/dl4mt/multiplicative.h
@@ -1,7 +1,7 @@
#pragma once
#include <boost/timer/timer.hpp>
-#include "gpu/mblas/matrix_functions.h"
-#include "gpu/mblas/matrix_wrapper.h"
+#include "gpu/mblas/tensor_functions.h"
+#include "gpu/mblas/tensor_wrapper.h"
#include "gpu/mblas/handles.h"
#include "gpu/dl4mt/cell.h"
#include "cellstate.h"
@@ -19,7 +19,7 @@ class Multiplicative: public Cell {
{}
virtual void GetNextState(CellState& NextState,
const CellState& State,
- const mblas::Matrix& Context) const {
+ const mblas::Tensor& Context) const {
using namespace mblas;
// TODO: the weight matrix naming probably is inconsistent
/* HANDLE_ERROR( cudaStreamSynchronize(mblas::CudaStreamHandler::GetStream())); */
@@ -39,7 +39,7 @@ class Multiplicative: public Cell {
private:
CellType<InnerWeights> innerCell_;
const Weights::MultWeights<InnerWeights>& w_;
- mutable mblas::Matrix x_mult_;
+ mutable mblas::Tensor x_mult_;
mutable CellState tempState_;
};
}
diff --git a/src/amun/gpu/mblas/nth_element.cu b/src/amun/gpu/mblas/nth_element.cu
index 6df073c6..2f979169 100644
--- a/src/amun/gpu/mblas/nth_element.cu
+++ b/src/amun/gpu/mblas/nth_element.cu
@@ -1,9 +1,9 @@
#include <iostream>
#include "common/utils.h"
-#include "matrix_wrapper.h"
+#include "tensor_wrapper.h"
#include "vector_wrapper.h"
#include "nth_element.h"
-#include "matrix_functions.h"
+#include "tensor_functions.h"
using namespace std;
@@ -29,7 +29,7 @@ NthElement::~NthElement()
//cerr << "FOO2" << endl;
}
-void NthElement::getNBestList(const std::vector<unsigned>& beamSizes, mblas::Matrix& Probs,
+void NthElement::getNBestList(const std::vector<unsigned>& beamSizes, mblas::Tensor& Probs,
std::vector<float>& outCosts, std::vector<unsigned>& outKeys,
const bool isFirst) {
/*
@@ -75,7 +75,7 @@ void NthElement::getNBestList(const std::vector<unsigned>& beamSizes, mblas::Mat
//cerr << "outKeys=" << Debug(outKeys, 2) << endl;
}
-void NthElement::getNBestList(mblas::Matrix &probs,
+void NthElement::getNBestList(mblas::Tensor &probs,
const std::vector<unsigned>& batchFirstElementIdxs,
const std::vector<unsigned>& cummulatedBeamSizes)
{
@@ -100,7 +100,7 @@ void NthElement::getNBestList(mblas::Matrix &probs,
cudaMemcpyHostToDevice);
mblas::VectorWrapper<NthOut> outWrap(d_out);
- mblas::MatrixWrapper<float> probsWrap(probs);
+ mblas::TensorWrapper<float> probsWrap(probs);
mblas::VectorWrapper<unsigned> batchPositionWrap(d_batchPosition);
mblas::VectorWrapper<NthOut> resWrap(d_res);
mblas::VectorWrapper<unsigned> cumBeamSizesWrap(d_cumBeamSizes);
@@ -153,7 +153,7 @@ void NthElement::GetPairs(unsigned number,
}
}
-void NthElement::getValueByKey(std::vector<float>& out, const mblas::Matrix &d_in) const
+void NthElement::getValueByKey(std::vector<float>& out, const mblas::Tensor &d_in) const
{
// need a model with multiple scorers to test this method
assert(false);
@@ -161,7 +161,7 @@ void NthElement::getValueByKey(std::vector<float>& out, const mblas::Matrix &d_i
out.resize(d_breakdown.size());
//mblas::VectorWrapper<float> breakdownWrap(d_breakdown);
- //const mblas::MatrixWrapper<float> inWrap(d_in);
+ //const mblas::TensorWrapper<float> inWrap(d_in);
//gGetValueByKey<<<1, lastN_, 0, stream_>>>
// (breakdownWrap, inWrap, h_res_idx, lastN_);
/*
diff --git a/src/amun/gpu/mblas/nth_element.h b/src/amun/gpu/mblas/nth_element.h
index 9bce6b24..5cc9a9d8 100644
--- a/src/amun/gpu/mblas/nth_element.h
+++ b/src/amun/gpu/mblas/nth_element.h
@@ -4,7 +4,7 @@
#include <algorithm>
#include <cuda.h>
-#include "gpu/mblas/matrix.h"
+#include "gpu/mblas/tensor.h"
#include "gpu/mblas/vector.h"
#include "nth_element_kernels.h"
@@ -21,7 +21,7 @@ class NthElement {
// standard nth_element
void getNBestList(const std::vector<unsigned>& beamSizes,
- mblas::Matrix& Probs,
+ mblas::Tensor& Probs,
std::vector<float>& outCosts,
std::vector<unsigned>& outKeys,
const bool isFirst=false);
@@ -30,7 +30,7 @@ class NthElement {
std::vector<unsigned>& outKeys,
std::vector<float>& outValues);
- void getValueByKey(std::vector<float>& out, const mblas::Matrix &d_in) const;
+ void getValueByKey(std::vector<float>& out, const mblas::Tensor &d_in) const;
private:
const unsigned BLOCK_SIZE = 512;
@@ -46,7 +46,7 @@ class NthElement {
unsigned maxBeamSize_, maxBatchSize_;
- void getNBestList(mblas::Matrix &probs,
+ void getNBestList(mblas::Tensor &probs,
const std::vector<unsigned>& batchFirstElementIdxs,
const std::vector<unsigned>& cummulatedBeamSizes);
diff --git a/src/amun/gpu/mblas/nth_element_kernels.cu b/src/amun/gpu/mblas/nth_element_kernels.cu
index f7707f71..5c1ae460 100644
--- a/src/amun/gpu/mblas/nth_element_kernels.cu
+++ b/src/amun/gpu/mblas/nth_element_kernels.cu
@@ -20,7 +20,7 @@ void UnrollMaxArgLoop(unsigned n, unsigned max, unsigned tid, float *sdata, unsi
}
__global__ void gMaxElement(mblas::VectorWrapper<NthOut> out,
- const mblas::MatrixWrapper<float> probsWrap,
+ const mblas::TensorWrapper<float> probsWrap,
const mblas::VectorWrapper<unsigned> batchPositionWrap,
unsigned numBatches) {
extern __shared__ float sdata[];
@@ -98,7 +98,7 @@ __global__ void gMaxElement(mblas::VectorWrapper<NthOut> out,
}
__global__ void gMaxElementUpdate(mblas::VectorWrapper<NthOut> out,
- mblas::MatrixWrapper<float> probsWrap,
+ mblas::TensorWrapper<float> probsWrap,
mblas::VectorWrapper<NthOut> resWrap,
const mblas::VectorWrapper<unsigned> batchPositionWrap,
const mblas::VectorWrapper<unsigned> cumBeamSizesWrap,
@@ -253,8 +253,8 @@ __global__ void gMaxElementUpdate(mblas::VectorWrapper<NthOut> out,
}
}
-__global__ void gGetValueByKey(mblas::MatrixWrapper<float> out,
- const mblas::MatrixWrapper<float> in,
+__global__ void gGetValueByKey(mblas::TensorWrapper<float> out,
+ const mblas::TensorWrapper<float> in,
unsigned* indices, unsigned n)
{
unsigned tid = threadIdx.x + blockDim.x * blockIdx.x;
diff --git a/src/amun/gpu/mblas/nth_element_kernels.h b/src/amun/gpu/mblas/nth_element_kernels.h
index aeefcdd7..1be357e0 100644
--- a/src/amun/gpu/mblas/nth_element_kernels.h
+++ b/src/amun/gpu/mblas/nth_element_kernels.h
@@ -1,6 +1,6 @@
#pragma once
-#include "matrix_wrapper.h"
+#include "tensor_wrapper.h"
#include "vector_wrapper.h"
namespace amunmt {
@@ -47,7 +47,7 @@ struct NthOutBatch
__device__ __host__
NthOutBatch(const float& rhs)
{
- // only to be used to init variable in matrix.h gSum
+ // only to be used to init variable in tensor.h gSum
assert(rhs == 0.0f);
ind = rhs;
score = rhs;
@@ -111,19 +111,19 @@ inline std::ostream& operator<<(std::ostream &out, const NthOutBatch &obj)
/////////////////////////////////////////////////////////////////////////////////////////
__global__ void gMaxElement(mblas::VectorWrapper<NthOut> out,
- const mblas::MatrixWrapper<float> probsWrap,
+ const mblas::TensorWrapper<float> probsWrap,
const mblas::VectorWrapper<unsigned> batchPositionWrap,
unsigned numBatches);
__global__ void gMaxElementUpdate(mblas::VectorWrapper<NthOut> out,
- mblas::MatrixWrapper<float> probsWrap,
+ mblas::TensorWrapper<float> probsWrap,
mblas::VectorWrapper<NthOut> resWrap,
const mblas::VectorWrapper<unsigned> batchPositionWrap,
const mblas::VectorWrapper<unsigned> cumBeamSizesWrap,
unsigned numBlocks);
-__global__ void gGetValueByKey(mblas::MatrixWrapper<float> out,
- const mblas::MatrixWrapper<float> in,
+__global__ void gGetValueByKey(mblas::TensorWrapper<float> out,
+ const mblas::TensorWrapper<float> in,
unsigned* indices, unsigned n);
}
diff --git a/src/amun/gpu/mblas/matrix.cu b/src/amun/gpu/mblas/tensor.cu
index 98946daa..303dce1c 100644
--- a/src/amun/gpu/mblas/matrix.cu
+++ b/src/amun/gpu/mblas/tensor.cu
@@ -1,4 +1,4 @@
-#include "matrix.h"
+#include "tensor.h"
using namespace std;
diff --git a/src/amun/gpu/mblas/matrix.h b/src/amun/gpu/mblas/tensor.h
index a79f6c47..231f9aff 100644
--- a/src/amun/gpu/mblas/matrix.h
+++ b/src/amun/gpu/mblas/tensor.h
@@ -6,7 +6,7 @@
#include <thrust/functional.h>
#include "common/exception.h"
-#include "common/base_matrix.h"
+#include "common/base_tensor.h"
#include "gpu/types-gpu.h"
#include "handles.h"
#include "vector.h"
@@ -52,11 +52,11 @@ T Sum(const T *data, unsigned count)
///////////////////////////////////////////////////////////////////////////////////////////////////
template <typename T>
-class TMatrix : public BaseMatrix {
+class TTensor : public BaseTensor {
public:
typedef T value_type;
- TMatrix()
+ TTensor()
{
dim_[0] = 0;
dim_[1] = 0;
@@ -64,7 +64,7 @@ class TMatrix : public BaseMatrix {
dim_[3] = 0;
}
- TMatrix(unsigned rows, unsigned cols, unsigned c, unsigned d, bool zero = false)
+ TTensor(unsigned rows, unsigned cols, unsigned c, unsigned d, bool zero = false)
{
dim_[0] = rows;
dim_[1] = cols;
@@ -79,13 +79,13 @@ class TMatrix : public BaseMatrix {
}
}
- TMatrix(TMatrix&& m)
- : TMatrix()
+ TTensor(TTensor&& m)
+ : TTensor()
{
swap(m);
}
- TMatrix(const TMatrix& m)
+ TTensor(const TTensor& m)
: vec_(m.vec_)
{
dim_[0] = m.dim_[0];
@@ -94,7 +94,7 @@ class TMatrix : public BaseMatrix {
dim_[3] = m.dim_[3];
}
- ~TMatrix()
+ ~TTensor()
{
}
@@ -131,7 +131,7 @@ class TMatrix : public BaseMatrix {
virtual std::string Debug(unsigned verbosity = 1) const
{
std::stringstream strm;
- strm << BaseMatrix::Debug(verbosity) << " ";
+ strm << BaseTensor::Debug(verbosity) << " ";
strm << vec_.data() << " "
<< vec_.size() << " "
<< vec_.maxSize() << " "
@@ -170,7 +170,7 @@ class TMatrix : public BaseMatrix {
return vec_.data();
}
- void swap(TMatrix &other)
+ void swap(TTensor &other)
{
std::swap(dim_, other.dim_);
vec_.swap(other.vec_);
@@ -181,8 +181,7 @@ class TMatrix : public BaseMatrix {
Vector<T> vec_;
};
-typedef TMatrix<float> Matrix;
-typedef TMatrix<unsigned> IMatrix;
+typedef TTensor<float> Tensor;
} // namespace mblas
diff --git a/src/amun/gpu/mblas/matrix_functions.cu b/src/amun/gpu/mblas/tensor_functions.cu
index 5a30c0c2..aada422c 100644
--- a/src/amun/gpu/mblas/matrix_functions.cu
+++ b/src/amun/gpu/mblas/tensor_functions.cu
@@ -1,4 +1,4 @@
-#include "gpu/mblas/matrix_functions.h"
+#include "gpu/mblas/tensor_functions.h"
#include "gpu/mblas/handles.h"
using namespace std;
@@ -11,13 +11,13 @@ thread_local CudaStreamHandler CudaStreamHandler::instance_;
thread_local CublasHandler CublasHandler::instance_;
-Matrix& Swap(Matrix& Out, Matrix& In) {
+Tensor& Swap(Tensor& Out, Tensor& In) {
Out.swap(In);
return Out;
}
-__global__ void gMean(MatrixWrapper<float> out,
- const MatrixWrapper<float> in,
+__global__ void gMean(TensorWrapper<float> out,
+ const TensorWrapper<float> in,
const VectorWrapper<unsigned> sentenceLengths)
{
// out = batches * states
@@ -51,8 +51,8 @@ __global__ void gMean(MatrixWrapper<float> out,
}
}
-void Mean(Matrix& Out,
- const Matrix& In,
+void Mean(Tensor& Out,
+ const Tensor& In,
const mblas::Vector<unsigned> &sentenceLengths)
{
assert(Out.dim(2) == 1);
@@ -65,8 +65,8 @@ void Mean(Matrix& Out,
unsigned stateLength = Out.dim(1);
unsigned sentenceLength = (In.dim(0) * In.dim(2) * In.dim(3)) / batchNum;
- MatrixWrapper<float> outWrap(Out);
- MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ TensorWrapper<float> inWrap(In);
//cerr << "outWrap=" << outWrap.Debug() << endl;
VectorWrapper<unsigned> sentenceLengthsWrap(sentenceLengths);
@@ -81,9 +81,9 @@ void Mean(Matrix& Out,
}
-__global__ void gWeightedMean(MatrixWrapper<float> out,
- const MatrixWrapper<float> weights,
- const MatrixWrapper<float> in,
+__global__ void gWeightedMean(TensorWrapper<float> out,
+ const TensorWrapper<float> weights,
+ const TensorWrapper<float> in,
const VectorWrapper<unsigned> mapping
)
{
@@ -107,16 +107,16 @@ __global__ void gWeightedMean(MatrixWrapper<float> out,
}
}
-void WeightedMean(Matrix& Out,const Matrix& Weights, const Matrix& In, const mblas::Vector<unsigned>& mapping)
+void WeightedMean(Tensor& Out,const Tensor& Weights, const Tensor& In, const mblas::Vector<unsigned>& mapping)
{
int numHypos = Weights.dim(0);
int states = In.dim(1);
Out.NewSize(numHypos, states);
- MatrixWrapper<float> outWrap(Out);
- MatrixWrapper<float> weightsWrap(Weights);
- MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ TensorWrapper<float> weightsWrap(Weights);
+ TensorWrapper<float> inWrap(In);
VectorWrapper<unsigned> mappingWrap(mapping);
unsigned size = Out.size();
@@ -140,7 +140,7 @@ void WeightedMean(Matrix& Out,const Matrix& Weights, const Matrix& In, const mbl
*/
}
-Matrix& Transpose(Matrix& Out, const Matrix& In) {
+Tensor& Transpose(Tensor& Out, const Tensor& In) {
unsigned m = In.dim(0);
unsigned n = In.dim(1);
@@ -155,14 +155,14 @@ Matrix& Transpose(Matrix& Out, const Matrix& In) {
return Out;
}
-Matrix& Transpose(Matrix& Out) {
- thread_local Matrix Temp;
+Tensor& Transpose(Tensor& Out) {
+ thread_local Tensor Temp;
Transpose(Temp, Out);
Swap(Out, Temp);
return Out;
}
-Matrix& Concat(Matrix& Out, const Matrix& In) {
+Tensor& Concat(Tensor& Out, const Tensor& In) {
unsigned oldSize = Out.size();
Out.Resize(Out.dim(0) + In.dim(0), Out.dim(1));
@@ -171,7 +171,7 @@ Matrix& Concat(Matrix& Out, const Matrix& In) {
return Out;
}
-Matrix& Copy(Matrix& Out, const Matrix& In) {
+Tensor& Copy(Tensor& Out, const Tensor& In) {
Out.NewSize(In.dim(0), In.dim(1), In.dim(2), In.dim(3));
mblas::copy(In.data(), In.size(), Out.data(), cudaMemcpyDeviceToDevice);
@@ -179,8 +179,8 @@ Matrix& Copy(Matrix& Out, const Matrix& In) {
return Out;
}
-__global__ void gPasteRows(MatrixWrapper<float> out,
- const MatrixWrapper<float> in,
+__global__ void gPasteRows(TensorWrapper<float> out,
+ const TensorWrapper<float> in,
int rowNo, int colNo)
{
int inRows = in.dim(0);
@@ -198,10 +198,10 @@ __global__ void gPasteRows(MatrixWrapper<float> out,
}
}
-void PasteRows(Matrix& Out, const Matrix& In, const unsigned rowNo, unsigned colNo)
+void PasteRows(Tensor& Out, const Tensor& In, const unsigned rowNo, unsigned colNo)
{
- MatrixWrapper<float> outWrap(Out);
- MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ TensorWrapper<float> inWrap(In);
unsigned size = In.size();
unsigned nThreads = std::min((unsigned) MAX_THREADS, (unsigned)size);
@@ -213,8 +213,8 @@ void PasteRows(Matrix& Out, const Matrix& In, const unsigned rowNo, unsigned col
}
-Matrix& PasteRow(Matrix& Out,
- const Matrix& In,
+Tensor& PasteRow(Tensor& Out,
+ const Tensor& In,
const unsigned r, const unsigned c)
{
unsigned start = r * Out.dim(1) + c;
@@ -224,8 +224,8 @@ Matrix& PasteRow(Matrix& Out,
return Out;
}
-Matrix& CopyRow(Matrix& Out,
- const Matrix& In,
+Tensor& CopyRow(Tensor& Out,
+ const Tensor& In,
const unsigned r, const unsigned c) {
unsigned length = In.dim(1) - c;
Out.NewSize(1, length);
@@ -238,8 +238,8 @@ Matrix& CopyRow(Matrix& Out,
return Out;
}
-__global__ void gCopyRows(MatrixWrapper<float> out,
- const MatrixWrapper<float> in,
+__global__ void gCopyRows(TensorWrapper<float> out,
+ const TensorWrapper<float> in,
const VectorWrapper<unsigned> indicesWrap)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
@@ -256,8 +256,8 @@ __global__ void gCopyRows(MatrixWrapper<float> out,
}
}
-Matrix& CopyRows(Matrix& Out,
- const Matrix& In,
+Tensor& CopyRows(Tensor& Out,
+ const Tensor& In,
const mblas::Vector<unsigned>& indices)
{
assert(In.dim(1) == Out.dim(1));
@@ -279,8 +279,8 @@ Matrix& CopyRows(Matrix& Out,
unsigned numPairs = indices.size();
- MatrixWrapper<float> outWrap(Out);
- const MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ const TensorWrapper<float> inWrap(In);
const VectorWrapper<unsigned> indicesWrap(indices);
//cerr << "size=" << size << endl;
@@ -295,8 +295,8 @@ Matrix& CopyRows(Matrix& Out,
}
-Matrix& Assemble(Matrix& Out,
- const Matrix& In,
+Tensor& Assemble(Tensor& Out,
+ const Tensor& In,
const mblas::Vector<unsigned>& indices) {
Out.NewSize(indices.size(), In.dim(1));
//cerr << "Assemble=" << Out.Debug() << " " << In.Debug() << indices.size() << endl;
@@ -305,8 +305,8 @@ Matrix& Assemble(Matrix& Out,
return Out;
}
-__global__ void gSlice(MatrixWrapper<float> out,
- const MatrixWrapper<float> in,
+__global__ void gSlice(TensorWrapper<float> out,
+ const TensorWrapper<float> in,
unsigned n, unsigned dim)
{
unsigned row = blockIdx.x;
@@ -323,8 +323,8 @@ __global__ void gSlice(MatrixWrapper<float> out,
}
-Matrix& Slice(Matrix& Out,
- const Matrix& In,
+Tensor& Slice(Tensor& Out,
+ const Tensor& In,
unsigned n, unsigned dim)
{
assert(In.dim(2) == 1);
@@ -332,8 +332,8 @@ Matrix& Slice(Matrix& Out,
Out.NewSize(In.dim(0), dim);
- MatrixWrapper<float> outWrap(Out);
- const MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ const TensorWrapper<float> inWrap(In);
/*
cerr << "outWrap=" << outWrap.Debug() << endl;
@@ -353,13 +353,13 @@ Matrix& Slice(Matrix& Out,
return Out;
}
-Matrix& Prod(cublasHandle_t handle, Matrix& C, const Matrix& A, const Matrix& B, bool transB)
+Tensor& Prod(cublasHandle_t handle, Tensor& C, const Tensor& A, const Tensor& B, bool transB)
{
BEGIN_TIMER("Prod");
assert((A.dim(2) == A.dim(3) == 1) || (B.dim(2) == B.dim(3) == 1));
- Matrix::value_type alpha = 1.0;
- Matrix::value_type beta = 0.0;
+ Tensor::value_type alpha = 1.0;
+ Tensor::value_type beta = 0.0;
unsigned m = A.dim(0) * A.dim(2) * A.dim(3);
unsigned k = A.dim(1);
@@ -402,6 +402,7 @@ Matrix& Prod(cublasHandle_t handle, Matrix& C, const Matrix& A, const Matrix& B,
cerr << "B=" << B.Debug(0) << endl;
cerr << "transB=" << transB << endl;
cerr << m << " " << n << " " << k << endl;
+ cerr << lda << " " << ldb << " " << ldc << endl;
cerr << endl;
*/
bool transA = false;
@@ -419,20 +420,20 @@ Matrix& Prod(cublasHandle_t handle, Matrix& C, const Matrix& A, const Matrix& B,
return C;
}
-Matrix& Prod(Matrix& C, const Matrix& A, const Matrix& B,
+Tensor& Prod(Tensor& C, const Tensor& A, const Tensor& B,
bool transB) {
//std::cerr << "1C=" << C.Debug() << std::endl;
//std::cerr << "1A=" << A.Debug() << std::endl;
//std::cerr << "1B=" << B.Debug() << std::endl;
- Matrix &ret = Prod(CublasHandler::GetHandle(), C, A, B, transB);
+ Tensor &ret = Prod(CublasHandler::GetHandle(), C, A, B, transB);
//std::cerr << "2C=" << C.Debug() << std::endl;
return ret;
}
-__global__ void gSoftMax(MatrixWrapper<float> out,
+__global__ void gSoftMax(TensorWrapper<float> out,
const VectorWrapper<unsigned> batchIdsWrap,
const VectorWrapper<unsigned> sentenceLengthsWrap,
unsigned shareSize)
@@ -517,14 +518,14 @@ __global__ void gSoftMax(MatrixWrapper<float> out,
}
}
-Matrix& Softmax(Matrix& Out,
+Tensor& Softmax(Tensor& Out,
const mblas::Vector<unsigned>& batchIds,
const mblas::Vector<unsigned> &sentenceLengths,
unsigned batchSize)
{
unsigned maxLength = Out.dim(1);
- MatrixWrapper<float> outWrap(Out);
+ TensorWrapper<float> outWrap(Out);
const VectorWrapper<unsigned> batchIdsWrap(batchIds);
const VectorWrapper<unsigned> sentenceLengthsWrap(sentenceLengths);
@@ -539,7 +540,7 @@ Matrix& Softmax(Matrix& Out,
return Out;
}
-__global__ void gLogSoftMax(MatrixWrapper<float> out, unsigned shareSize)
+__global__ void gLogSoftMax(TensorWrapper<float> out, unsigned shareSize)
{
extern __shared__ float _share[];
@@ -619,9 +620,9 @@ __global__ void gLogSoftMax(MatrixWrapper<float> out, unsigned shareSize)
}
-Matrix& LogSoftmax(Matrix& Out)
+Tensor& LogSoftmax(Tensor& Out)
{
- MatrixWrapper<float> outWrap(Out);
+ TensorWrapper<float> outWrap(Out);
int blocks = std::min(MAX_BLOCKS, (int)Out.dim(0));
int threads = std::min(MAX_THREADS, (int)Out.dim(1));
@@ -634,7 +635,7 @@ Matrix& LogSoftmax(Matrix& Out)
return Out;
}
-__global__ void gSetColumn(MatrixWrapper<float> in, int noColumn, float value) {
+__global__ void gSetColumn(TensorWrapper<float> in, int noColumn, float value) {
int n_rows = in.dim(0);
int rowNumber = threadIdx.x + blockDim.x * blockIdx.x;
@@ -644,33 +645,33 @@ __global__ void gSetColumn(MatrixWrapper<float> in, int noColumn, float value) {
}
}
-void SetColumn(Matrix& In, int noColumn, float value) {
+void SetColumn(Tensor& In, int noColumn, float value) {
int nRows = In.dim(0);
int nBlocks = nRows / MAX_THREADS + ((nRows % MAX_THREADS == 0) ? 0 : 1);
int nThreads = std::min(MAX_THREADS, nRows);
- MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> inWrap(In);
gSetColumn<<<nBlocks, nThreads, 0, mblas::CudaStreamHandler::GetStream()>>>
(inWrap, noColumn, value);
HANDLE_ERROR(cudaGetLastError());
}
-__global__ void gFill(MatrixWrapper<float> in, float val) {
+__global__ void gFill(TensorWrapper<float> in, float val) {
int index = threadIdx.x + blockDim.x * blockIdx.x;
if (index < in.size()) {
in[index] = val;
}
}
-void Fill(Matrix& In, float value) {
+void Fill(Tensor& In, float value) {
unsigned size = In.size();
if (value) {
int nThreads = std::min(MAX_THREADS, (int)size);
int nBlocks = (size / nThreads) + ((size % nThreads == 0) ? 0 : 1);
- MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> inWrap(In);
gFill<<<nBlocks, nThreads, 0, CudaStreamHandler::GetStream()>>>
(inWrap, value);
@@ -683,7 +684,7 @@ void Fill(Matrix& In, float value) {
}
__global__
-void gMapMatrix(MatrixWrapper<float> in,
+void gMapMatrix(TensorWrapper<float> in,
const VectorWrapper<unsigned> sentenceLengthsWrap,
int i)
{
@@ -698,7 +699,7 @@ void gMapMatrix(MatrixWrapper<float> in,
}
}
-void MapMatrix(Matrix& state,
+void MapMatrix(Tensor& state,
const mblas::Vector<unsigned> &sentenceLengths,
unsigned i)
{
@@ -711,7 +712,7 @@ void MapMatrix(Matrix& state,
int numThreads = std::min((int)state.size(), MAX_THREADS);
int numBlocks = (state.size() / numThreads) + ((state.size() % numThreads == 0) ? 0 : 1);
- MatrixWrapper<float> stateWrap(state);
+ TensorWrapper<float> stateWrap(state);
VectorWrapper<unsigned> sentenceLengthsWrap(sentenceLengths);
gMapMatrix<<<numBlocks, numThreads, 0, CudaStreamHandler::GetStream()>>>
@@ -737,10 +738,10 @@ __device__ unsigned getIndex(const dim3 &dim, const dim3 &val)
}
-__global__ void gLNormalization(MatrixWrapper<float> out,
- const MatrixWrapper<float> in,
- const MatrixWrapper<float> alphaWrap,
- const MatrixWrapper<float> betaWrap,
+__global__ void gLNormalization(TensorWrapper<float> out,
+ const TensorWrapper<float> in,
+ const TensorWrapper<float> alphaWrap,
+ const TensorWrapper<float> betaWrap,
float eps=0.00001)
{
extern __shared__ float _share[];
@@ -814,10 +815,10 @@ __global__ void gLNormalization(MatrixWrapper<float> out,
}
-void Normalization(Matrix &out,
- const Matrix &in,
- const Matrix &alpha,
- const Matrix *beta,
+void Normalization(Tensor &out,
+ const Tensor &in,
+ const Tensor &alpha,
+ const Tensor *beta,
float eps)
{
assert(in.dim(0) < MAX_BLOCKS);
@@ -830,10 +831,10 @@ void Normalization(Matrix &out,
dim3 numBlocks(in.dim(0), in.dim(2), in.dim(3));
int shared = numThreads * sizeof(float) * 2;
- MatrixWrapper<float> outWrap(out);
- const MatrixWrapper<float> inWrap(in);
- const MatrixWrapper<float> alphaWrap(alpha);
- MatrixWrapper<float> *betaWrap = beta ? new MatrixWrapper<float>(*beta) : new MatrixWrapper<float>();
+ TensorWrapper<float> outWrap(out);
+ const TensorWrapper<float> inWrap(in);
+ const TensorWrapper<float> alphaWrap(alpha);
+ TensorWrapper<float> *betaWrap = beta ? new TensorWrapper<float>(*beta) : new TensorWrapper<float>();
gLNormalization<<<numBlocks, numThreads, shared, CudaStreamHandler::GetStream()>>>
(outWrap, inWrap, alphaWrap, *betaWrap, eps);
@@ -853,13 +854,13 @@ void Normalization(Matrix &out,
delete betaWrap;
}
-void Normalization(Matrix& out, const Matrix& in, const Matrix& alpha, const Matrix& beta,
+void Normalization(Tensor& out, const Tensor& in, const Tensor& alpha, const Tensor& beta,
float eps)
{
Normalization(out, in, alpha, &beta, eps);
}
-void Normalization(Matrix& out, const Matrix& in, const Matrix& alpha, float eps)
+void Normalization(Tensor& out, const Tensor& in, const Tensor& alpha, float eps)
{
Normalization(out, in, alpha, nullptr, eps);
}
@@ -927,7 +928,7 @@ void gBeamSizeInit(VectorWrapper<unsigned> hypo2BeamSizeWrap,
}
__device__
-float GetMaxScore(const MatrixWrapper<NthOutBatch> &nBestMatrix)
+float GetMaxScore(const TensorWrapper<NthOutBatch> &nBestMatrix)
{
float ret = LOWEST_FLOAT;
for (unsigned i = 0; i < nBestMatrix.dim(1); ++i) {
@@ -1017,8 +1018,8 @@ void MergeElement(float &minScore,
__device__
void NBestAndMax(VectorWrapper<NthOutBatch> &nBestCandidatesWrap,
float &topScore,
- const MatrixWrapper<float> &in,
- const MatrixWrapper<float> &b4Wrap,
+ const TensorWrapper<float> &in,
+ const TensorWrapper<float> &b4Wrap,
unsigned hypoInd,
unsigned maxBeamSize,
bool forbidUNK,
@@ -1028,10 +1029,10 @@ void NBestAndMax(VectorWrapper<NthOutBatch> &nBestCandidatesWrap,
extern __shared__ char _sharePtr[];
// placeholder for shared mem in subsequent function SumAndLogSoftMax
- //MatrixWrapper<float> maxMatrix((float*)_sharePtr, blockDim.x, 1, 1, 1);
+ //TensorWrapper<float> maxMatrix((float*)_sharePtr, blockDim.x, 1, 1, 1);
void *ptrOffset = _sharePtr + sizeof(float) * blockDim.x;
- MatrixWrapper<NthOutBatch> nBestMatrix((NthOutBatch*)ptrOffset, blockDim.x, maxBeamSize, 1, 1);
+ TensorWrapper<NthOutBatch> nBestMatrix((NthOutBatch*)ptrOffset, blockDim.x, maxBeamSize, 1, 1);
VectorWrapper<NthOutBatch> row = nBestMatrix.Row(threadIdx.x);
unsigned vocabSize = in.dim(1);
@@ -1106,8 +1107,8 @@ void NBestAndMax(VectorWrapper<NthOutBatch> &nBestCandidatesWrap,
///////////////////////////////////////////////////////////////////////////////////////////////////////
__device__
void SumAndLogSoftMax(VectorWrapper<NthOutBatch> &nBestCandidatesWrap,
- const MatrixWrapper<float> &in,
- const MatrixWrapper<float> &b4Wrap,
+ const TensorWrapper<float> &in,
+ const TensorWrapper<float> &b4Wrap,
unsigned hypoInd,
unsigned maxBeamSize,
float topScore,
@@ -1159,8 +1160,8 @@ void SumAndLogSoftMax(VectorWrapper<NthOutBatch> &nBestCandidatesWrap,
///////////////////////////////////////////////////////////////////////////////////////////////////////
__global__ void gLogSoftMax(VectorWrapper<NthOutBatch> nBestCandidatesWrap,
- const MatrixWrapper<float> in,
- const MatrixWrapper<float> b4Wrap,
+ const TensorWrapper<float> in,
+ const TensorWrapper<float> b4Wrap,
unsigned maxBeamSize,
bool forbidUNK,
const VectorWrapper<unsigned> hypo2BeamSizeWrap,
@@ -1204,7 +1205,7 @@ __global__ void gLogSoftMax(VectorWrapper<NthOutBatch> nBestCandidatesWrap,
///////////////////////////////////////////////////////////////////////////////////////////////////////
__global__ void gNBestPerBatch(VectorWrapper<NthOutBatch> nBestWrap,
VectorWrapper<NthOutBatch> nBestCandidatesWrap,
- const MatrixWrapper<float> in,
+ const TensorWrapper<float> in,
const VectorWrapper<float> costsWrap,
unsigned maxBeamSize,
bool forbidUNK,
@@ -1299,8 +1300,8 @@ __global__ void gNBestPerBatch(VectorWrapper<NthOutBatch> nBestWrap,
///////////////////////////////////////////////////////////////////////////////////////////////////////
void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
- const Matrix& in,
- const Matrix& b4,
+ const Tensor& in,
+ const Tensor& b4,
const mblas::Vector<float> &costs,
bool forbidUNK,
unsigned maxBeamSize,
@@ -1352,8 +1353,8 @@ void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
cerr << endl;
*/
- MatrixWrapper<float> inWrap(in);
- MatrixWrapper<float> b4Wrap(b4);
+ TensorWrapper<float> inWrap(in);
+ TensorWrapper<float> b4Wrap(b4);
VectorWrapper<unsigned> hypo2BeamSizeWrap(hypo2BeamSize);
VectorWrapper<unsigned> hypo2CandidateWrap(hypo2Candidate);
VectorWrapper<unsigned> batch2HypoWrap(batch2Hypo);
diff --git a/src/amun/gpu/mblas/matrix_functions.h b/src/amun/gpu/mblas/tensor_functions.h
index b7d7a0e5..720f811b 100644
--- a/src/amun/gpu/mblas/matrix_functions.h
+++ b/src/amun/gpu/mblas/tensor_functions.h
@@ -10,8 +10,8 @@
#include <iostream>
#include "gpu/mblas/thrust_functions.h"
-#include "gpu/mblas/matrix.h"
-#include "gpu/mblas/matrix_wrapper.h"
+#include "gpu/mblas/tensor.h"
+#include "gpu/mblas/tensor_wrapper.h"
#include "gpu/mblas/handles.h"
#include "gpu/mblas/nth_element_kernels.h"
#include "gpu/mblas/vector_wrapper.h"
@@ -73,66 +73,66 @@ void copy(const T *in, unsigned count, T *out, cudaMemcpyKind kind) {
HANDLE_ERROR( cudaMemcpyAsync(out, in, count * sizeof(T), kind, CudaStreamHandler::GetStream()) );
}
-void Fill(Matrix& In, float value=0.0f);
+void Fill(Tensor& In, float value=0.0f);
-Matrix& Swap(Matrix& Out, Matrix& In);
+Tensor& Swap(Tensor& Out, Tensor& In);
-void Mean(Matrix& Out,
- const Matrix& In,
+void Mean(Tensor& Out,
+ const Tensor& In,
const mblas::Vector<unsigned> &sentenceLengths);
-void WeightedMean(Matrix& Out,const Matrix& Weights, const Matrix& In, const mblas::Vector<unsigned>& mapping);
+void WeightedMean(Tensor& Out,const Tensor& Weights, const Tensor& In, const mblas::Vector<unsigned>& mapping);
-Matrix& Transpose(Matrix& Out, const Matrix& In);
+Tensor& Transpose(Tensor& Out, const Tensor& In);
-Matrix& Transpose(Matrix& Out);
+Tensor& Transpose(Tensor& Out);
-Matrix& Copy(Matrix& Out, const Matrix& In);
+Tensor& Copy(Tensor& Out, const Tensor& In);
-Matrix& PasteRow(Matrix& Out,
- const Matrix& In,
+Tensor& PasteRow(Tensor& Out,
+ const Tensor& In,
const unsigned r = 0,
const unsigned c = 0);
-void PasteRows(Matrix& Out, const Matrix& In, const unsigned rowNo, unsigned colNo=0);
+void PasteRows(Tensor& Out, const Tensor& In, const unsigned rowNo, unsigned colNo=0);
-Matrix& CopyRow(Matrix& Out,
- const Matrix& In,
+Tensor& CopyRow(Tensor& Out,
+ const Tensor& In,
const unsigned r = 0,
const unsigned c = 0);
-Matrix& Concat(Matrix& Out, const Matrix& In);
+Tensor& Concat(Tensor& Out, const Tensor& In);
-void MapMatrix(Matrix& state,
+void MapMatrix(Tensor& state,
const mblas::Vector<unsigned> &sentenceLengths,
unsigned i);
-Matrix& CopyRows(Matrix& Out,
- const Matrix& In,
+Tensor& CopyRows(Tensor& Out,
+ const Tensor& In,
const mblas::Vector<unsigned>& indices);
-Matrix& Assemble(Matrix& Out,
- const Matrix& In,
+Tensor& Assemble(Tensor& Out,
+ const Tensor& In,
const mblas::Vector<unsigned>& indices);
-Matrix& Slice(Matrix& Out,
- const Matrix& In,
+Tensor& Slice(Tensor& Out,
+ const Tensor& In,
unsigned n, unsigned dim);
-Matrix& Prod(Matrix& C, const Matrix& A, const Matrix& B,
+Tensor& Prod(Tensor& C, const Tensor& A, const Tensor& B,
bool transB = false);
-Matrix& Softmax(Matrix& Out,
+Tensor& Softmax(Tensor& Out,
const mblas::Vector<unsigned>& batchIds,
const mblas::Vector<unsigned> &sentenceLengths,
unsigned batchSize);
-Matrix& LogSoftmax(Matrix& Out);
+Tensor& LogSoftmax(Tensor& Out);
template <class Functor>
__global__ void gBroadcast(Functor functor,
- MatrixWrapper<float> outWrap,
- const MatrixWrapper<float> in1Wrap,
- const MatrixWrapper<float> in2Wrap,
+ TensorWrapper<float> outWrap,
+ const TensorWrapper<float> in1Wrap,
+ const TensorWrapper<float> in2Wrap,
const VectorWrapper<unsigned> batchMappingWrap)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
@@ -168,10 +168,10 @@ __global__ void gBroadcast(Functor functor,
}
template <class Functor>
-Matrix& Broadcast(Functor functor,
- Matrix& out,
- const Matrix& in1,
- const Matrix& in2,
+Tensor& Broadcast(Functor functor,
+ Tensor& out,
+ const Tensor& in1,
+ const Tensor& in2,
const mblas::Vector<unsigned>& batchMapping,
unsigned srcSize)
{
@@ -183,9 +183,9 @@ Matrix& Broadcast(Functor functor,
out.NewSize(srcSize, cols, sumOfBeamSizes);
- MatrixWrapper<float> outWrap(out);
- const MatrixWrapper<float> in1Wrap(in1);
- const MatrixWrapper<float> in2Wrap(in2);
+ TensorWrapper<float> outWrap(out);
+ const TensorWrapper<float> in1Wrap(in1);
+ const TensorWrapper<float> in2Wrap(in2);
const VectorWrapper<unsigned> batchMappingWrap(batchMapping);
unsigned size = out.size();
@@ -215,7 +215,7 @@ Matrix& Broadcast(Functor functor,
template <class Functor>
__global__ void gBroadcastVecColumn(Functor functor,
- MatrixWrapper<float> outWrap,
+ TensorWrapper<float> outWrap,
const VectorWrapper<float> inWrap) {
extern __shared__ float sdataOrig[];
@@ -240,12 +240,12 @@ __global__ void gBroadcastVecColumn(Functor functor,
}
template <class Functor>
-Matrix& BroadcastVecColumn(Functor functor, Matrix& Out, const mblas::Vector<float>& In)
+Tensor& BroadcastVecColumn(Functor functor, Tensor& Out, const mblas::Vector<float>& In)
{
unsigned rows = Out.dim(0);
unsigned cols = Out.dim(1);
- MatrixWrapper<float> outWrap(Out);
+ TensorWrapper<float> outWrap(Out);
const VectorWrapper<float> inWrap(In);
int threads = std::min(MAX_THREADS, (int)cols);
@@ -260,8 +260,8 @@ Matrix& BroadcastVecColumn(Functor functor, Matrix& Out, const mblas::Vector<flo
template <class Functor>
__global__ void gBroadcastVec(Functor functor,
- MatrixWrapper<float> outWrap,
- const MatrixWrapper<float> inWrap)
+ TensorWrapper<float> outWrap,
+ const TensorWrapper<float> inWrap)
{
unsigned cols = outWrap.dim(1);
@@ -282,15 +282,15 @@ __global__ void gBroadcastVec(Functor functor,
}
template <class Functor>
-Matrix& BroadcastVec(Functor functor, Matrix& Out, const Matrix& In)
+Tensor& BroadcastVec(Functor functor, Tensor& Out, const Tensor& In)
{
//std::cerr << "Out=" << Out.Debug() << std::endl;
//std::cerr << "In=" << In.Debug() << std::endl;
unsigned cols = Out.dim(1);
- MatrixWrapper<float> outWrap(Out);
- const MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ const TensorWrapper<float> inWrap(In);
int threads = std::min(MAX_THREADS, (int)cols);
int blocks = cols / threads + ((cols % threads == 0) ? 0 : 1);
@@ -305,7 +305,7 @@ Matrix& BroadcastVec(Functor functor, Matrix& Out, const Matrix& In)
template <class Functor>
__global__ void gElement(Functor functor,
- MatrixWrapper<float> outWrap)
+ TensorWrapper<float> outWrap)
{
unsigned ind = blockIdx.x * blockDim.x + threadIdx.x;
if (ind < outWrap.size()) {
@@ -314,15 +314,15 @@ __global__ void gElement(Functor functor,
}
template <class Functor>
-Matrix& Element(Functor functor,
- Matrix& Out)
+Tensor& Element(Functor functor,
+ Tensor& Out)
{
unsigned size = Out.size();
unsigned threads = std::min((unsigned) MAX_THREADS, (unsigned)size);
unsigned blocks = size / threads + ((size % threads == 0) ? 0 : 1);
const cudaStream_t& stream = CudaStreamHandler::GetStream();
- MatrixWrapper<float> outWrap(Out);
+ TensorWrapper<float> outWrap(Out);
gElement<<<blocks, threads, 0, stream>>>
(functor, outWrap);
@@ -333,8 +333,8 @@ Matrix& Element(Functor functor,
template <class Functor>
__global__ void gElement(Functor functor,
- MatrixWrapper<float> outWrap,
- const MatrixWrapper<float> inWrap)
+ TensorWrapper<float> outWrap,
+ const TensorWrapper<float> inWrap)
{
unsigned ind = blockIdx.x * blockDim.x + threadIdx.x;
if (ind < outWrap.size()) {
@@ -343,8 +343,8 @@ __global__ void gElement(Functor functor,
}
template <class Functor>
-Matrix& Element(Functor functor,
- Matrix& Out, const Matrix& In)
+Tensor& Element(Functor functor,
+ Tensor& Out, const Tensor& In)
{
assert(Out.size() == In.size());
@@ -353,8 +353,8 @@ Matrix& Element(Functor functor,
unsigned blocks = size / threads + ((size % threads == 0) ? 0 : 1);
const cudaStream_t& stream = CudaStreamHandler::GetStream();
- MatrixWrapper<float> outWrap(Out);
- const MatrixWrapper<float> inWrap(In);
+ TensorWrapper<float> outWrap(Out);
+ const TensorWrapper<float> inWrap(In);
gElement<<<blocks, threads, 0, stream>>>
(functor, outWrap, inWrap);
@@ -365,9 +365,9 @@ Matrix& Element(Functor functor,
template <class Functor>
__global__ void gElement(Functor functor,
- MatrixWrapper<float> outWrap,
- const MatrixWrapper<float> in1Wrap,
- const MatrixWrapper<float> in2Wrap)
+ TensorWrapper<float> outWrap,
+ const TensorWrapper<float> in1Wrap,
+ const TensorWrapper<float> in2Wrap)
{
unsigned ind = blockIdx.x * blockDim.x + threadIdx.x;
if (ind < outWrap.size()) {
@@ -376,8 +376,8 @@ __global__ void gElement(Functor functor,
}
template <class Functor>
-Matrix& Element(Functor functor,
- Matrix& Out, const Matrix& In1, const Matrix& In2)
+Tensor& Element(Functor functor,
+ Tensor& Out, const Tensor& In1, const Tensor& In2)
{
//std::cerr << "Out=" << Out.Debug() << std::endl;
//std::cerr << "In1=" << In1.Debug() << std::endl;
@@ -395,9 +395,9 @@ Matrix& Element(Functor functor,
//std::cerr << "Element3=" << In1.Debug(0) << std::endl;
//std::cerr << "Element3=" << In2.Debug(0) << std::endl;
//std::cerr << std::endl;
- MatrixWrapper<float> outWrap(Out);
- const MatrixWrapper<float> in1Wrap(In1);
- const MatrixWrapper<float> in2Wrap(In2);
+ TensorWrapper<float> outWrap(Out);
+ const TensorWrapper<float> in1Wrap(In1);
+ const TensorWrapper<float> in2Wrap(In2);
//std::cerr << "outWrap=" << outWrap.Debug() << std::endl;
gElement<<<blocks, threads, 0, stream>>>
@@ -411,16 +411,16 @@ Matrix& Element(Functor functor,
return Out;
}
-void SetColumn(Matrix& In, int noColumn, float value);
+void SetColumn(Tensor& In, int noColumn, float value);
-void Normalization(Matrix& out, const Matrix& in, const Matrix& alpha, const Matrix& beta,
+void Normalization(Tensor& out, const Tensor& in, const Tensor& alpha, const Tensor& beta,
float eps);
-void Normalization(Matrix& out, const Matrix& in, const Matrix& alpha, float eps);
+void Normalization(Tensor& out, const Tensor& in, const Tensor& alpha, float eps);
void LogSoftmaxAndNBest(mblas::Vector<NthOutBatch> &nBest,
- const Matrix& in,
- const Matrix& b4,
+ const Tensor& in,
+ const Tensor& b4,
const mblas::Vector<float> &costs,
bool forbidUNK,
unsigned maxBeamSize,
diff --git a/src/amun/gpu/mblas/matrix_wrapper.h b/src/amun/gpu/mblas/tensor_wrapper.h
index 4e50a9aa..4dff4bc7 100644
--- a/src/amun/gpu/mblas/matrix_wrapper.h
+++ b/src/amun/gpu/mblas/tensor_wrapper.h
@@ -1,5 +1,5 @@
#pragma once
-#include "matrix.h"
+#include "tensor.h"
#include "vector_wrapper.h"
namespace amunmt {
@@ -7,10 +7,10 @@ namespace GPU {
namespace mblas {
template <typename T>
-class MatrixWrapper
+class TensorWrapper
{
public:
- MatrixWrapper()
+ TensorWrapper()
{
dim_[0] = 0;
dim_[1] = 0;
@@ -22,7 +22,7 @@ public:
dataConst_ = nullptr;
}
- MatrixWrapper(const TMatrix<T> &matrix)
+ TensorWrapper(const TTensor<T> &matrix)
{
dim_[0] = matrix.dim(0);
dim_[1] = matrix.dim(1);
@@ -34,7 +34,7 @@ public:
dataConst_ = matrix.data();
}
- MatrixWrapper(TMatrix<T> &matrix)
+ TensorWrapper(TTensor<T> &matrix)
{
dim_[0] = matrix.dim(0);
dim_[1] = matrix.dim(1);
@@ -46,7 +46,7 @@ public:
dataConst_ = data_;
}
- MatrixWrapper(unsigned a, unsigned b, unsigned c, unsigned d)
+ TensorWrapper(unsigned a, unsigned b, unsigned c, unsigned d)
{ // test constructor
dim_[0] = a;
dim_[1] = b;
@@ -59,7 +59,7 @@ public:
}
__device__
- MatrixWrapper(T *ptr, unsigned a, unsigned b, unsigned c, unsigned d)
+ TensorWrapper(T *ptr, unsigned a, unsigned b, unsigned c, unsigned d)
{
dim_[0] = a;
dim_[1] = b;
@@ -309,7 +309,7 @@ protected:
inline void testidToMatrixInd()
{
- MatrixWrapper<float> matrix(2, 4, 3, 5);
+ TensorWrapper<float> matrix(2, 4, 3, 5);
std::cerr << "matrix=" << matrix.Debug() << std::endl;
diff --git a/src/amun/gpu/mblas/vector_wrapper.h b/src/amun/gpu/mblas/vector_wrapper.h
index 98ccfb85..15cce201 100644
--- a/src/amun/gpu/mblas/vector_wrapper.h
+++ b/src/amun/gpu/mblas/vector_wrapper.h
@@ -1,6 +1,6 @@
#pragma once
#include <sstream>
-#include "matrix.h"
+#include "tensor.h"
#include "gpu/mblas/vector.h"
namespace amunmt {
diff --git a/src/amun/gpu/npz_converter.cu b/src/amun/gpu/npz_converter.cu
index 9791206a..c808ad32 100644
--- a/src/amun/gpu/npz_converter.cu
+++ b/src/amun/gpu/npz_converter.cu
@@ -1,6 +1,6 @@
#include "npz_converter.h"
#include "common/exception.h"
-#include "mblas/matrix_functions.h"
+#include "mblas/tensor_functions.h"
using namespace std;
@@ -33,17 +33,17 @@ T Debug(const T *data, unsigned size)
return sum;
}
-std::shared_ptr<mblas::Matrix> NpzConverter::get(const std::string& key, bool mandatory, bool transpose) const
+std::shared_ptr<mblas::Tensor> NpzConverter::get(const std::string& key, bool mandatory, bool transpose) const
{
//mblas::TestMemCpy();
- std::shared_ptr<mblas::Matrix> ret;
+ std::shared_ptr<mblas::Tensor> ret;
auto it = model_.find(key);
if(it != model_.end()) {
NpyMatrixWrapper np(it->second);
unsigned size = np.size();
- mblas::Matrix *matrix = new mblas::Matrix(np.size1(), np.size2(), 1, 1);
+ mblas::Tensor *matrix = new mblas::Tensor(np.size1(), np.size2(), 1, 1);
mblas::copy(np.data(), size, matrix->data(), cudaMemcpyHostToDevice);
if (transpose) {
@@ -53,12 +53,12 @@ std::shared_ptr<mblas::Matrix> NpzConverter::get(const std::string& key, bool ma
ret.reset(matrix);
}
else if (mandatory) {
- std::cerr << "Error: Matrix not found:" << key << std::endl;
+ std::cerr << "Error: Tensor not found:" << key << std::endl;
//amunmt_UTIL_THROW2(strm.str()); // << key << std::endl
abort();
}
else {
- mblas::Matrix *matrix = new mblas::Matrix();
+ mblas::Tensor *matrix = new mblas::Tensor();
ret.reset(matrix);
}
@@ -66,14 +66,14 @@ std::shared_ptr<mblas::Matrix> NpzConverter::get(const std::string& key, bool ma
return ret;
}
-std::shared_ptr<mblas::Matrix> NpzConverter::getFirstOfMany(const std::vector<std::pair<std::string, bool>> keys, bool mandatory) const
+std::shared_ptr<mblas::Tensor> NpzConverter::getFirstOfMany(const std::vector<std::pair<std::string, bool>> keys, bool mandatory) const
{
- std::shared_ptr<mblas::Matrix> ret;
+ std::shared_ptr<mblas::Tensor> ret;
for (auto key : keys) {
auto it = model_.find(key.first);
if(it != model_.end()) {
NpyMatrixWrapper np(it->second);
- mblas::Matrix *matrix = new mblas::Matrix(np.size1(), np.size2(), 1, 1);
+ mblas::Tensor *matrix = new mblas::Tensor(np.size1(), np.size2(), 1, 1);
mblas::copy(np.data(), np.size(), matrix->data(), cudaMemcpyHostToDevice);
if (key.second) {
@@ -85,7 +85,7 @@ std::shared_ptr<mblas::Matrix> NpzConverter::getFirstOfMany(const std::vector<st
}
if (mandatory) {
- std::cerr << "Error: Matrix not found:" << keys[0].first << std::endl;
+ std::cerr << "Error: Tensor not found:" << keys[0].first << std::endl;
//amunmt_UTIL_THROW2(strm.str()); // << key << std::endl
abort();
}
diff --git a/src/amun/gpu/npz_converter.h b/src/amun/gpu/npz_converter.h
index 204a1f4b..074c66a1 100644
--- a/src/amun/gpu/npz_converter.h
+++ b/src/amun/gpu/npz_converter.h
@@ -1,7 +1,7 @@
#pragma once
#include "cnpy/cnpy.h"
-#include "mblas/matrix_functions.h"
+#include "mblas/tensor_functions.h"
namespace amunmt {
namespace GPU {
@@ -46,8 +46,8 @@ class NpzConverter {
void Destruct();
- std::shared_ptr<mblas::Matrix> get(const std::string& key, bool mandatory, bool transpose = false) const;
- std::shared_ptr<mblas::Matrix> getFirstOfMany(const std::vector<std::pair<std::string, bool>> keys, bool mandatory) const;
+ std::shared_ptr<mblas::Tensor> get(const std::string& key, bool mandatory, bool transpose = false) const;
+ std::shared_ptr<mblas::Tensor> getFirstOfMany(const std::vector<std::pair<std::string, bool>> keys, bool mandatory) const;
private:
cnpy::npz_t model_;
diff --git a/src/marian b/src/marian
-Subproject 8b00026868ce0e3cb6107aa4a866c59447aa427
+Subproject 3deb9015a8729f4453f0f06b27385e764cce196