Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSara Stymne <sara.stymne@lingfil.uu.se>2013-07-03 16:03:58 +0400
committerSara Stymne <sara.stymne@lingfil.uu.se>2013-07-03 16:03:58 +0400
commitb2eb42ed12ff64d0aa4b2842acb9efb8280e9ce3 (patch)
tree3fc869d98fd7ea99922c400adda8b295ab6ab7ee /mert/BleuDocScorer.h
parent4e4cf1e313a1acb97a2d0af4a1a0f51c9a3e027f (diff)
added document level Bleu scoring to mert
Diffstat (limited to 'mert/BleuDocScorer.h')
-rw-r--r--mert/BleuDocScorer.h67
1 files changed, 67 insertions, 0 deletions
diff --git a/mert/BleuDocScorer.h b/mert/BleuDocScorer.h
new file mode 100644
index 000000000..349745825
--- /dev/null
+++ b/mert/BleuDocScorer.h
@@ -0,0 +1,67 @@
+#ifndef MERT_BLEU_DOC_SCORER_H_
+#define MERT_BLEU_DOC_SCORER_H_
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "Types.h"
+#include "ScoreData.h"
+#include "StatisticsBasedScorer.h"
+#include "ScopedVector.h"
+#include "BleuScorer.h"
+
+namespace MosesTuning
+{
+
+/**
+ * Bleu document scoring
+ *
+ * Needs xml reference files, and nbest lists where sentences are separated by '\n'
+ */
+class BleuDocScorer : public BleuScorer
+{
+public:
+
+ explicit BleuDocScorer(const std::string& config = "");
+ ~BleuDocScorer();
+
+ virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
+ virtual statscore_t calculateScore(const std::vector<int>& comps) const;
+
+ int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
+
+ // NOTE: this function is used for unit testing.
+ virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
+
+private:
+ ReferenceLengthType m_ref_length_type;
+
+ // reference translations.
+ ScopedVector<ScopedVector<Reference> > m_references;
+
+ // no copying allowed
+ BleuDocScorer(const BleuDocScorer&);
+ BleuDocScorer& operator=(const BleuDocScorer&);
+
+ std::vector<std::string> splitDoc(const std::string& text);
+};
+
+/* /\** Computes sentence-level BLEU+1 score. */
+/* * This function is used in PRO. */
+/* *\/ */
+/* float sentenceLevelBleuPlusOne(const std::vector<float>& stats); */
+
+/* /\** Computes sentence-level BLEU score given a background corpus. */
+/* * This function is used in batch MIRA. */
+/* *\/ */
+/* float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg); */
+
+/* /\** */
+/* * Computes plain old BLEU from a vector of stats */
+/* *\/ */
+/* float unsmoothedBleu(const std::vector<float>& stats); */
+
+}
+
+#endif // MERT_BLEU_DOC_SCORER_H_