Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/mert
diff options
context:
space:
mode:
authorbhaddow <barry.haddow@gmail.com>2011-11-15 02:18:22 +0400
committerbhaddow <barry.haddow@gmail.com>2011-11-15 02:18:22 +0400
commit3a6c0e0680e656a9a05da24c1b54e54caf651f48 (patch)
tree5b0c73dfe9c3c0bf72ab403002bf50e1a3e82559 /mert
parent4cf6e0320a90950ab2688861066c36e613c534fa (diff)
iterate through feature file
Diffstat (limited to 'mert')
-rw-r--r--mert/FeatureDataIterator.cpp61
-rw-r--r--mert/FeatureDataIterator.h44
-rw-r--r--mert/pro.cpp1
3 files changed, 77 insertions, 29 deletions
diff --git a/mert/FeatureDataIterator.cpp b/mert/FeatureDataIterator.cpp
index a581965c6..700398cbb 100644
--- a/mert/FeatureDataIterator.cpp
+++ b/mert/FeatureDataIterator.cpp
@@ -19,22 +19,77 @@ You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
+#include <iostream>
+#include <sstream>
+#include "util/string_piece.hh"
+#include "util/tokenize_piece.hh"
+
+#include "FeatureArray.h"
#include "FeatureDataIterator.h"
+
using namespace std;
+using namespace util;
+
+
+
+FeatureDataIterator::FeatureDataIterator() {}
FeatureDataIterator::FeatureDataIterator(const string filename) {
+ m_in.reset(new FilePiece(filename.c_str()));
+ readNext();
}
-void FeatureDataIterator::increment() {
+void FeatureDataIterator::readNext() {
+ try {
+ StringPiece marker = m_in->ReadDelimited();
+ if (marker != StringPiece(FEATURES_TXT_BEGIN)) {
+ throw FileFormatException(m_in->FileName(), marker.as_string());
+ }
+ size_t sentenceId = m_in->ReadULong();
+ size_t count = m_in->ReadULong();
+ cerr << "Expecting " << count << endl;
+ m_in->ReadLine(); //discard rest of line
+ for (size_t i = 0; i < count; ++i) {
+ StringPiece line = m_in->ReadLine();
+ for (util::TokenIter<util::AnyCharacter, true> token(line, util::AnyCharacter(" \t")); token; ++token) {
+ //TODO: Create FeatureDataItem
+ char* err_ind;
+ float value = static_cast<float>(strtod(token->data(), &err_ind));
+ if (err_ind == token->data()) {
+ throw FileFormatException(m_in->FileName(), line.as_string());
+ }
+ cerr << value << ",";
+ }
+ cerr << "\n";
+ }
+ StringPiece line = m_in->ReadLine();
+ if (line != StringPiece(FEATURES_TXT_END)) {
+ throw FileFormatException(m_in->FileName(), line.as_string());
+ }
+ } catch (EndOfFileException &e) {
+ m_in.reset();
+ }
+}
+void FeatureDataIterator::increment() {
+ readNext();
}
bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
-
+ if (!m_in && !rhs.m_in) {
+ return true;
+ } else if (!m_in) {
+ return false;
+ } else if (!rhs.m_in) {
+ return false;
+ } else {
+ return m_in->FileName() == rhs.m_in->FileName() &&
+ m_in->Offset() == rhs.m_in->Offset();
+ }
}
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
-
+ return m_next;
}
diff --git a/mert/FeatureDataIterator.h b/mert/FeatureDataIterator.h
index e87a6386d..49d77f77f 100644
--- a/mert/FeatureDataIterator.h
+++ b/mert/FeatureDataIterator.h
@@ -29,36 +29,17 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <fstream>
#include <map>
+#include <memory>
+#include <stdexcept>
#include <vector>
#include <boost/iterator/iterator_facade.hpp>
+#include <boost/shared_ptr.hpp>
+#include "util/file_piece.hh"
-//Minimal sparse vector
-class SparseVector {
+#include "FeatureStats.h"
- public:
- typedef std::map<size_t,float> fvector_t;
- typedef std::map<std::string, size_t> name2id_t;
- typedef std::vector<std::string> id2name_t;
-
- float get(std::string name) const;
- float get(size_t id) const;
- void set(std::string name, float value);
- void clear();
- size_t size() const;
-
- void write(std::ostream& out, const std::string& sep = " ") const;
-
- SparseVector& operator-=(const SparseVector& rhs);
-
- private:
- static name2id_t name2id_;
- static id2name_t id2name_;
- fvector_t fvector_;
-};
-
-SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
class FeatureDataItem {
public:
@@ -66,16 +47,24 @@ class FeatureDataItem {
SparseVector sparse;
};
+class FileFormatException : public util::Exception {
+ public:
+ explicit FileFormatException(const std::string filename, const std::string& line) {
+ *this << "Error in line \"" << line << "\" of " << filename;
+ }
+};
+
class FeatureDataIterator :
public boost::iterator_facade<FeatureDataIterator,
const std::vector<FeatureDataItem>,
boost::forward_traversal_tag>
{
public:
+ FeatureDataIterator();
FeatureDataIterator(const std::string filename);
static FeatureDataIterator end() {
- return FeatureDataIterator("");
+ return FeatureDataIterator();
}
@@ -86,7 +75,10 @@ class FeatureDataIterator :
bool equal(const FeatureDataIterator& rhs) const;
const std::vector<FeatureDataItem>& dereference() const;
- std::ifstream* in_;
+ void readNext();
+
+ boost::shared_ptr<util::FilePiece> m_in;
+ std::vector<FeatureDataItem> m_next;
};
#endif
diff --git a/mert/pro.cpp b/mert/pro.cpp
index ac2503172..3272b3209 100644
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@@ -77,6 +77,7 @@ int main(int argc, char** argv)
}
FeatureDataIterator fi(featureFiles[0]);
+ //cerr << featureFiles[0] << endl;
for (; fi != FeatureDataIterator::end(); ++fi) {
const vector<FeatureDataItem>& featureData = *fi;
}