Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEva <eva@deimos.(none)>2012-04-29 10:11:30 +0400
committerEva <eva@deimos.(none)>2012-04-29 10:11:30 +0400
commit6f39ad0b3e51a4034707e0b592a3d40e8c7f0fd4 (patch)
tree299c7f2d15199b9d6fcfaed3c8c98f005909e37d /OnDiskPt
parentb8b3000daf9be08f2748b5f0a616b22bde2f913f (diff)
test
Diffstat (limited to 'OnDiskPt')
-rw-r--r--OnDiskPt/Main.cpp4
-rw-r--r--OnDiskPt/TargetPhrase.cpp23
-rw-r--r--OnDiskPt/TargetPhrase.h1
-rw-r--r--OnDiskPt/TargetPhraseCollection.cpp3
4 files changed, 27 insertions, 4 deletions
diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp
index 72bf8b531..5b3ac6cb8 100644
--- a/OnDiskPt/Main.cpp
+++ b/OnDiskPt/Main.cpp
@@ -55,7 +55,6 @@ int main (int argc, char * const argv[])
const string filePath = argv[6]
,destPath = argv[7];
-
Moses::InputFileStream inStream(filePath);
OnDiskWrapper onDiskWrapper;
@@ -138,7 +137,8 @@ void Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line
break;
}
case 3: {
- targetPhrase.Create1AlignFromString(tok);
+ //targetPhrase.Create1AlignFromString(tok);
+ targetPhrase.CreateAlignFromString(tok);
break;
}
case 4:
diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp
index b740811d8..aedb2a37e 100644
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@@ -27,6 +27,8 @@
#include "TargetPhrase.h"
#include "OnDiskWrapper.h"
+#include <boost/algorithm/string.hpp>
+
using namespace std;
namespace OnDiskPt
@@ -61,6 +63,18 @@ void TargetPhrase::Create1AlignFromString(const std::string &align1Str)
m_align.push_back(pair<size_t, size_t>(alignPoints[0], alignPoints[1]) );
}
+void TargetPhrase::CreateAlignFromString(const std::string &alignStr)
+{
+ vector<std::string> alignPairs;
+ boost::split(alignPairs, alignStr, boost::is_any_of("\t "));
+ for (size_t i = 0; i < alignPairs.size(); ++i) {
+ vector<size_t> alignPoints;
+ Moses::Tokenize<size_t>(alignPoints, alignPairs[i], "-");
+ m_align.push_back(pair<size_t, size_t>(alignPoints[0], alignPoints[1]) );
+ }
+}
+
+
void TargetPhrase::SetScore(float score, size_t ind)
{
CHECK(ind < m_scores.size());
@@ -143,9 +157,10 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// phrase id
memcpy(mem, &m_filePos, sizeof(UINT64));
memUsed += sizeof(UINT64);
-
+
// align
- memUsed += WriteAlignToMemory(mem + memUsed);
+ size_t tmp = WriteAlignToMemory(mem + memUsed);
+ memUsed += tmp;
// scores
memUsed += WriteScoresToMemory(mem + memUsed);
@@ -176,6 +191,7 @@ size_t TargetPhrase::WriteAlignToMemory(char *mem) const
memUsed += sizeof(alignPair.second);
}
+ std::cerr << "align memory used: " << memUsed << std::endl;
return memUsed;
}
@@ -269,12 +285,14 @@ UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP, size_t numFactors)
UINT64 TargetPhrase::ReadAlignFromFile(std::fstream &fileTPColl)
{
+ std::cerr << "read alignment.." << std::endl;
UINT64 bytesRead = 0;
UINT64 numAlign;
fileTPColl.read((char*) &numAlign, sizeof(UINT64));
bytesRead += sizeof(UINT64);
+ std::cerr << "numAlign: " << numAlign << std::endl;
for (size_t ind = 0; ind < numAlign; ++ind) {
AlignPair alignPair;
fileTPColl.read((char*) &alignPair.first, sizeof(UINT64));
@@ -284,6 +302,7 @@ UINT64 TargetPhrase::ReadAlignFromFile(std::fstream &fileTPColl)
bytesRead += sizeof(UINT64) * 2;
}
+ std::cerr << "Align bytes read: " << bytesRead << std::endl;
return bytesRead;
}
diff --git a/OnDiskPt/TargetPhrase.h b/OnDiskPt/TargetPhrase.h
index 56c7b6d3f..1ff6f46a2 100644
--- a/OnDiskPt/TargetPhrase.h
+++ b/OnDiskPt/TargetPhrase.h
@@ -63,6 +63,7 @@ public:
void SetLHS(Word *lhs);
void Create1AlignFromString(const std::string &align1Str);
+ void CreateAlignFromString(const std::string &align1Str);
void SetScore(float score, size_t ind);
const AlignType &GetAlign() const {
diff --git a/OnDiskPt/TargetPhraseCollection.cpp b/OnDiskPt/TargetPhraseCollection.cpp
index b57ce4ee3..295726ce1 100644
--- a/OnDiskPt/TargetPhraseCollection.cpp
+++ b/OnDiskPt/TargetPhraseCollection.cpp
@@ -173,11 +173,14 @@ void TargetPhraseCollection::ReadFromFile(size_t tableLimit, UINT64 filePos, OnD
TargetPhrase *tp = new TargetPhrase(numScores);
UINT64 sizeOtherInfo = tp->ReadOtherInfoFromFile(currFilePos, fileTPColl);
+ std::cerr << "other info done." << std::endl;
tp->ReadFromFile(fileTP, numTargetFactors);
+ std::cerr << "done reading from file." << std::endl;
currFilePos += sizeOtherInfo;
m_coll.push_back(tp);
+ std::cerr << "tp done." << std::endl;
}
}