Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hihoan@microsoft.com>2021-03-23 04:22:45 +0300
committerHieu Hoang <hihoan@microsoft.com>2021-03-23 04:22:45 +0300
commit64707fa484c79fe68fdf5022b269b99d70bee5fd (patch)
tree8cc8257e313b26b64964ef1afa6eccf66adce7d5
parent415769fb2f953c5abf88f2d3498f2a46ea3607d7 (diff)
Revert "start lsh shortlist"
This reverts commit 415769fb2f953c5abf88f2d3498f2a46ea3607d7.
-rw-r--r--src/data/shortlist.cpp16
-rw-r--r--src/data/shortlist.h8
-rw-r--r--src/translator/translator.h5
3 files changed, 3 insertions, 26 deletions
diff --git a/src/data/shortlist.cpp b/src/data/shortlist.cpp
index 2d4a5edc..6f551262 100644
--- a/src/data/shortlist.cpp
+++ b/src/data/shortlist.cpp
@@ -133,30 +133,16 @@ Ptr<Shortlist> QuicksandShortlistGenerator::generate(Ptr<data::CorpusBatch> batc
return New<Shortlist>(indices);
}
-LSHlistGenerator::LSHlistGenerator(int k, int nbits) {
-
-}
-
-Ptr<Shortlist> LSHlistGenerator::generate(Ptr<data::CorpusBatch> batch) const {
-
-}
-
-
Ptr<ShortlistGenerator> createShortlistGenerator(Ptr<Options> options,
Ptr<const Vocab> srcVocab,
Ptr<const Vocab> trgVocab,
size_t srcIdx,
size_t trgIdx,
- const std::vector<int> &lshOpts,
bool shared) {
- std::cerr << "lshOpts=" << lshOpts.size() << std::endl;
std::vector<std::string> vals = options->get<std::vector<std::string>>("shortlist");
ABORT_IF(vals.empty(), "No path to shortlist given");
std::string fname = vals[0];
- if (lshOpts.size() == 2) {
- return New<LSHlistGenerator>(lshOpts[0], lshOpts[1]);
- }
- else if(filesystem::Path(fname).extension().string() == ".bin") {
+ if(filesystem::Path(fname).extension().string() == ".bin") {
return New<QuicksandShortlistGenerator>(options, srcVocab, trgVocab, srcIdx, trgIdx, shared);
} else {
return New<LexicalShortlistGenerator>(options, srcVocab, trgVocab, srcIdx, trgIdx, shared);
diff --git a/src/data/shortlist.h b/src/data/shortlist.h
index be04e518..ab6a087b 100644
--- a/src/data/shortlist.h
+++ b/src/data/shortlist.h
@@ -328,13 +328,6 @@ public:
virtual Ptr<Shortlist> generate(Ptr<data::CorpusBatch> batch) const override;
};
-class LSHlistGenerator : public ShortlistGenerator {
-private:
-
-public:
- LSHlistGenerator(int k, int nbits);
-};
-
/*
Shortlist factory to create correct type of shortlist. Currently assumes everything is a text shortlist
unless the extension is *.bin for which the Microsoft legacy binary shortlist is used.
@@ -344,7 +337,6 @@ Ptr<ShortlistGenerator> createShortlistGenerator(Ptr<Options> options,
Ptr<const Vocab> trgVocab,
size_t srcIdx = 0,
size_t trgIdx = 1,
- const std::vector<int> &lshOpts,
bool shared = false);
} // namespace data
diff --git a/src/translator/translator.h b/src/translator/translator.h
index edc4a4fa..fe01065b 100644
--- a/src/translator/translator.h
+++ b/src/translator/translator.h
@@ -62,9 +62,8 @@ public:
trgVocab_->load(vocabs.back());
auto srcVocab = corpus_->getVocabs()[0];
- std::vector<int> lshOpts = options_->get<std::vector<int>>("output-approx-knn");
- if(lshOpts.size() == 2 || options_->hasAndNotEmpty("shortlist"))
- shortlistGenerator_ = data::createShortlistGenerator(options_, srcVocab, trgVocab_, 0, 1, lshOpts, vocabs.front() == vocabs.back());
+ if(options_->hasAndNotEmpty("shortlist"))
+ shortlistGenerator_ = data::createShortlistGenerator(options_, srcVocab, trgVocab_, 0, 1, vocabs.front() == vocabs.back());
auto devices = Config::getDevices(options_);
numDevices_ = devices.size();