Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'OnDiskPt')
-rw-r--r--OnDiskPt/Main.cpp29
-rw-r--r--OnDiskPt/OnDiskWrapper.cpp2
-rw-r--r--OnDiskPt/TargetPhrase.cpp37
-rw-r--r--OnDiskPt/TargetPhrase.h5
4 files changed, 51 insertions, 22 deletions
diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp
index f2d75ed05..fdfc561be 100644
--- a/OnDiskPt/Main.cpp
+++ b/OnDiskPt/Main.cpp
@@ -109,6 +109,8 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
+ stringstream property;
+
size_t scoreInd = 0;
// MAIN LOOP
@@ -118,6 +120,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
2 = scores
3 = align
4 = count
+ 7 = properties
*/
char *tok = strtok (line," ");
OnDiskPt::PhrasePtr out(new Phrase());
@@ -148,29 +151,18 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
targetPhrase.CreateAlignFromString(tok);
break;
}
- case 4:
- ++stage;
+ case 4: {
break;
- /* case 5: {
- // count info. Only store the 2nd one
- float val = Moses::Scan<float>(tok);
- misc[0] = val;
- ++stage;
- break;
- }*/
+ }
case 5: {
- // count info. Only store the 2nd one
- //float val = Moses::Scan<float>(tok);
- //misc[0] = val;
- ++stage;
+ // store only the 3rd one (rule count)
+ float val = Moses::Scan<float>(tok);
+ misc[0] = val;
break;
}
case 6: {
- // store only the 3rd one (rule count)
- float val = Moses::Scan<float>(tok);
- misc[0] = val;
- ++stage;
- break;
+ property << tok << " ";
+ break;
}
default:
cerr << "ERROR in line " << line << endl;
@@ -183,6 +175,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
} // while (tok != NULL)
assert(scoreInd == numScores);
+ targetPhrase.SetProperty(Moses::Trim(property.str()));
targetPhrase.SortAlign();
return out;
} // Tokenize()
diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp
index 0120802ac..4593d3f47 100644
--- a/OnDiskPt/OnDiskWrapper.cpp
+++ b/OnDiskPt/OnDiskWrapper.cpp
@@ -31,7 +31,7 @@ using namespace std;
namespace OnDiskPt
{
-int OnDiskWrapper::VERSION_NUM = 5;
+int OnDiskWrapper::VERSION_NUM = 6;
OnDiskWrapper::OnDiskWrapper()
{
diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp
index cb821a557..ce750d2e2 100644
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@@ -162,10 +162,13 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// allocate mem
size_t numScores = onDiskWrapper.GetNumScores()
,numAlign = GetAlign().size();
+ size_t propSize = m_property.size();
- size_t memNeeded = sizeof(UINT64); // file pos (phrase id)
- memNeeded += sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign; // align
- memNeeded += sizeof(float) * numScores; // scores
+ size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ + sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ + sizeof(float) * numScores // scores
+ + sizeof(UINT64) // size of property string
+ + propSize; // actual property string
char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded);
@@ -183,6 +186,16 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// scores
memUsed += WriteScoresToMemory(mem + memUsed);
+ // property string
+ char *currPtr = (char*)mem + memUsed;
+ UINT64 *memTmp = (UINT64*) currPtr;
+ memTmp[0] = propSize;
+ memUsed += sizeof(UINT64);
+
+ const char *propChar = m_property.c_str();
+ memcpy(mem + memUsed, propChar, propSize);
+ memUsed += propSize;
+
//DebugMem(mem, memNeeded);
assert(memNeeded == memUsed);
return mem;
@@ -281,6 +294,9 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
+ // property
+ ret->SetProperties(m_property);
+
return ret;
}
@@ -299,6 +315,21 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
memUsed += ReadScoresFromFile(fileTPColl);
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
+ // properties
+ UINT64 propSize;
+ fileTPColl.read((char*) &propSize, sizeof(UINT64));
+ memUsed += sizeof(UINT64);
+
+ if (propSize) {
+ char *mem = (char*) malloc(propSize + 1);
+ mem[propSize] = '\0';
+ fileTPColl.read(mem, propSize);
+ m_property = string(mem);
+ free(mem);
+
+ memUsed += propSize;
+ }
+
return memUsed;
}
diff --git a/OnDiskPt/TargetPhrase.h b/OnDiskPt/TargetPhrase.h
index 5b8a30296..283e7815e 100644
--- a/OnDiskPt/TargetPhrase.h
+++ b/OnDiskPt/TargetPhrase.h
@@ -50,6 +50,7 @@ class TargetPhrase: public Phrase
protected:
AlignType m_align;
PhrasePtr m_sourcePhrase;
+ std::string m_property;
std::vector<float> m_scores;
UINT64 m_filePos;
@@ -110,6 +111,10 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
+ void SetProperty(const std::string prop)
+ {
+ m_property = prop;
+ }
};
}