Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/moses
diff options
context:
space:
mode:
authornicolabertoldi <nicolabertoldi@1f5c12ca-751b-0410-a591-d2e778427230>2011-10-03 20:02:02 +0400
committernicolabertoldi <nicolabertoldi@1f5c12ca-751b-0410-a591-d2e778427230>2011-10-03 20:02:02 +0400
commit47e452a076b3112561bb68127aee7a68467ed1ed (patch)
tree26bbc06932bdc22465ebddbee587634f89932053 /moses
parent1ea3acde3d4dfe170a9deb8390ac4ad8a8f8f61e (diff)
made LM interface compliant with IRSTLM 5.70.02; fixed a bug related to word encoding
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4292 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'moses')
-rw-r--r--moses/src/LanguageModelIRST.cpp62
-rw-r--r--moses/src/LanguageModelIRST.h5
2 files changed, 10 insertions, 57 deletions
diff --git a/moses/src/LanguageModelIRST.cpp b/moses/src/LanguageModelIRST.cpp
index 3684241f8..9ac8c6617 100644
--- a/moses/src/LanguageModelIRST.cpp
+++ b/moses/src/LanguageModelIRST.cpp
@@ -25,10 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <fstream>
#include "dictionary.h"
#include "n_gram.h"
-#include "lmtable.h"
-#include "lmmacro.h"
-#include "lmclass.h"
-
+#include "lmContainer.h"
#include "LanguageModelIRST.h"
#include "TypeDef.h"
@@ -72,56 +69,16 @@ bool LanguageModelIRST::Load(const std::string &filePath,
m_nGramOrder = nGramOrder;
m_filePath = filePath;
- //checking the language model type
- int lmtype = getLanguageModelType(m_filePath);
- std::cerr << "IRSTLM Language Model Type of " << filePath << " is " << lmtype << std::endl;
-
- if (lmtype == _IRSTLM_LMMACRO) {
- // case lmmacro: LM is of type lmmacro, create an object of lmmacro
-
- m_lmtb = new lmmacro();
- d=((lmmacro *)m_lmtb)->getDict();
-
- ((lmmacro*) m_lmtb)->load(m_filePath);
- } else if (lmtype == _IRSTLM_LMCLASS) {
- // case lmclass: LM is of type lmclass, create an object of lmclass
-
- m_lmtb = new lmclass();
- d=((lmclass *)m_lmtb)->getDict();
-
- ((lmclass*) m_lmtb)->load(m_filePath);
- } else if (lmtype == _IRSTLM_LMTABLE) {
- // case (standard) lmmacro: LM is of type lmtable: create an object of lmtable
- std::cerr << "Loading LM file (no MAP)\n";
- m_lmtb = (lmtable *)new lmtable();
- d=((lmtable *)m_lmtb)->getDict();
-
- // Load the (possibly binary) model
- // Open the input file (possibly gzipped)
- InputFileStream inp(m_filePath);
-
-#ifdef WIN32
- m_lmtb->load(inp); //don't use memory map
-#else
- if (m_filePath.compare(m_filePath.size()-3,3,".mm")==0)
- m_lmtb->load(inp,m_filePath.c_str(),NULL,1);
- else
- m_lmtb->load(inp);
-#endif
-
- } else {
- std::cerr << "This language model type is unknown!" << std::endl;
- exit(1);
- }
- if ((lmtype == _IRSTLM_LMMACRO) || ((lmtype == _IRSTLM_LMCLASS))){
- m_lmtb->getDict()->incflag(1);
- }
+ m_lmtb = m_lmtb->CreateLanguageModel(m_filePath);
+ m_lmtb->setMaxLoadedLevel(1000);
+ m_lmtb->load(m_filePath);
+ d=m_lmtb->getDict();
+ d->incflag(1);
m_lmtb_size=m_lmtb->maxlevel();
// LM can be ok, just outputs warnings
-
// Mauro: in the original, the following two instructions are wrongly switched:
m_unknownId = d->oovcode(); // at the level of micro tags
m_empty = -1; // code for an empty position
@@ -131,8 +88,7 @@ bool LanguageModelIRST::Load(const std::string &filePath,
VERBOSE(1, "IRST: m_unknownId=" << m_unknownId << std::endl);
//install caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags)
- int ml = ((lmtable *)m_lmtb)->maxlevel();
- m_lmtb->init_caches(ml>2?ml-1:2);
+ m_lmtb->init_caches(m_lmtb_size>2?m_lmtb_size-1:2);
if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub);
@@ -145,6 +101,7 @@ void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
// code copied & paste from SRI LM class. should do template function
std::map<size_t, int> lmIdMap;
size_t maxFactorId = 0; // to create lookup vector later on
+ m_empty = -1; // code for an empty position
dict_entry *entry;
dictionary_iter iter(d); // at the level of micro tags
@@ -170,15 +127,12 @@ void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection)
// add to lookup vector in object
m_lmIdLookup.resize(maxFactorId+1);
-
fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_empty);
map<size_t, int>::iterator iterMap;
for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap) {
m_lmIdLookup[iterMap->first] = iterMap->second;
}
-
-
}
int LanguageModelIRST::GetLmID( const std::string &str ) const
diff --git a/moses/src/LanguageModelIRST.h b/moses/src/LanguageModelIRST.h
index 111dd8e8d..58c3036da 100644
--- a/moses/src/LanguageModelIRST.h
+++ b/moses/src/LanguageModelIRST.h
@@ -29,8 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "LanguageModelSingleFactor.h"
-class lmtable; // irst lm table
-class lmmacro; // irst lm for macro tags
+class lmContainer; // irst lm container for any lm type
class ngram;
class dictionary;
@@ -45,7 +44,7 @@ class LanguageModelIRST : public LanguageModelPointerState
{
protected:
mutable std::vector<int> m_lmIdLookup;
- lmtable* m_lmtb;
+ lmContainer* m_lmtb;
int m_unknownId; //code of OOV
int m_empty; //code of an empty position