diff options
Diffstat (limited to 'OnDiskPt')
-rw-r--r-- | OnDiskPt/Main.cpp | 29 | ||||
-rw-r--r-- | OnDiskPt/OnDiskWrapper.cpp | 2 | ||||
-rw-r--r-- | OnDiskPt/TargetPhrase.cpp | 37 | ||||
-rw-r--r-- | OnDiskPt/TargetPhrase.h | 5 |
4 files changed, 51 insertions, 22 deletions
diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp index f2d75ed05..fdfc561be 100644 --- a/OnDiskPt/Main.cpp +++ b/OnDiskPt/Main.cpp @@ -109,6 +109,8 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc) { + stringstream property; + size_t scoreInd = 0; // MAIN LOOP @@ -118,6 +120,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr 2 = scores 3 = align 4 = count + 7 = properties */ char *tok = strtok (line," "); OnDiskPt::PhrasePtr out(new Phrase()); @@ -148,29 +151,18 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr targetPhrase.CreateAlignFromString(tok); break; } - case 4: - ++stage; + case 4: { break; - /* case 5: { - // count info. Only store the 2nd one - float val = Moses::Scan<float>(tok); - misc[0] = val; - ++stage; - break; - }*/ + } case 5: { - // count info. Only store the 2nd one - //float val = Moses::Scan<float>(tok); - //misc[0] = val; - ++stage; + // store only the 3rd one (rule count) + float val = Moses::Scan<float>(tok); + misc[0] = val; break; } case 6: { - // store only the 3rd one (rule count) - float val = Moses::Scan<float>(tok); - misc[0] = val; - ++stage; - break; + property << tok << " "; + break; } default: cerr << "ERROR in line " << line << endl; @@ -183,6 +175,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr } // while (tok != NULL) assert(scoreInd == numScores); + targetPhrase.SetProperty(Moses::Trim(property.str())); targetPhrase.SortAlign(); return out; } // Tokenize() diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp index 0120802ac..4593d3f47 100644 --- a/OnDiskPt/OnDiskWrapper.cpp +++ b/OnDiskPt/OnDiskWrapper.cpp @@ -31,7 +31,7 @@ using namespace std; namespace OnDiskPt { -int OnDiskWrapper::VERSION_NUM = 5; +int OnDiskWrapper::VERSION_NUM = 6; OnDiskWrapper::OnDiskWrapper() { diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp index cb821a557..ce750d2e2 100644 --- a/OnDiskPt/TargetPhrase.cpp +++ b/OnDiskPt/TargetPhrase.cpp @@ -162,10 +162,13 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t // allocate mem size_t numScores = onDiskWrapper.GetNumScores() ,numAlign = GetAlign().size(); + size_t propSize = m_property.size(); - size_t memNeeded = sizeof(UINT64); // file pos (phrase id) - memNeeded += sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign; // align - memNeeded += sizeof(float) * numScores; // scores + size_t memNeeded = sizeof(UINT64) // file pos (phrase id) + + sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align + + sizeof(float) * numScores // scores + + sizeof(UINT64) // size of property string + + propSize; // actual property string char *mem = (char*) malloc(memNeeded); //memset(mem, 0, memNeeded); @@ -183,6 +186,16 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t // scores memUsed += WriteScoresToMemory(mem + memUsed); + // property string + char *currPtr = (char*)mem + memUsed; + UINT64 *memTmp = (UINT64*) currPtr; + memTmp[0] = propSize; + memUsed += sizeof(UINT64); + + const char *propChar = m_property.c_str(); + memcpy(mem + memUsed, propChar, propSize); + memUsed += propSize; + //DebugMem(mem, memNeeded); assert(memNeeded == memUsed); return mem; @@ -281,6 +294,9 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto ret->GetScoreBreakdown().Assign(&phraseDict, m_scores); ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply()); + // property + ret->SetProperties(m_property); + return ret; } @@ -299,6 +315,21 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC memUsed += ReadScoresFromFile(fileTPColl); assert((memUsed + filePos) == (UINT64)fileTPColl.tellg()); + // properties + UINT64 propSize; + fileTPColl.read((char*) &propSize, sizeof(UINT64)); + memUsed += sizeof(UINT64); + + if (propSize) { + char *mem = (char*) malloc(propSize + 1); + mem[propSize] = '\0'; + fileTPColl.read(mem, propSize); + m_property = string(mem); + free(mem); + + memUsed += propSize; + } + return memUsed; } diff --git a/OnDiskPt/TargetPhrase.h b/OnDiskPt/TargetPhrase.h index 5b8a30296..283e7815e 100644 --- a/OnDiskPt/TargetPhrase.h +++ b/OnDiskPt/TargetPhrase.h @@ -50,6 +50,7 @@ class TargetPhrase: public Phrase protected: AlignType m_align; PhrasePtr m_sourcePhrase; + std::string m_property; std::vector<float> m_scores; UINT64 m_filePos; @@ -110,6 +111,10 @@ public: virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const; + void SetProperty(const std::string prop) + { + m_property = prop; + } }; } |