Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2014-06-06 22:23:43 +0400
committerHieu Hoang <hieu@hoang.co.uk>2014-06-06 22:23:43 +0400
commit9e0ebc16ee23a4989db2278e89eddf506a88eea1 (patch)
tree4f89b163d1b2e94b80acd0f693c65f8a476bf10e /OnDiskPt
parent091ce3f016258506cc8b45291592550bf11f5045 (diff)
sparse features in on-disk-pt
Diffstat (limited to 'OnDiskPt')
-rw-r--r--OnDiskPt/Main.cpp15
-rw-r--r--OnDiskPt/TargetPhrase.h11
2 files changed, 17 insertions, 9 deletions
diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp
index fdfc561be..e6d24a308 100644
--- a/OnDiskPt/Main.cpp
+++ b/OnDiskPt/Main.cpp
@@ -109,7 +109,7 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
- stringstream property;
+ stringstream sparseFeatures, property;
size_t scoreInd = 0;
@@ -152,13 +152,15 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
break;
}
case 4: {
- break;
+ // store only the 3rd one (rule count)
+ float val = Moses::Scan<float>(tok);
+ misc[0] = val;
+ break;
}
case 5: {
- // store only the 3rd one (rule count)
- float val = Moses::Scan<float>(tok);
- misc[0] = val;
- break;
+ // sparse features
+ sparseFeatures << tok << " ";
+ break;
}
case 6: {
property << tok << " ";
@@ -175,6 +177,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
} // while (tok != NULL)
assert(scoreInd == numScores);
+ targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
targetPhrase.SetProperty(Moses::Trim(property.str()));
targetPhrase.SortAlign();
return out;
diff --git a/OnDiskPt/TargetPhrase.h b/OnDiskPt/TargetPhrase.h
index 283e7815e..d8ca77ffa 100644
--- a/OnDiskPt/TargetPhrase.h
+++ b/OnDiskPt/TargetPhrase.h
@@ -50,7 +50,7 @@ class TargetPhrase: public Phrase
protected:
AlignType m_align;
PhrasePtr m_sourcePhrase;
- std::string m_property;
+ std::string m_property, m_sparseFeatures;
std::vector<float> m_scores;
UINT64 m_filePos;
@@ -111,9 +111,14 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
- void SetProperty(const std::string prop)
+ void SetProperty(const std::string &value)
{
- m_property = prop;
+ m_property = value;
+ }
+
+ void SetSparseFeatures(const std::string &value)
+ {
+ m_sparseFeatures = value;
}
};