15 files changed, 858 insertions, 30 deletions
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index 9564b1cfe..ccf0fc275 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -463,6 +463,96 @@ std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::s
 }
 
 
+void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key, 
+                                                        const std::vector<float> &orientationClassPriorsL2R, 
+                                                        const std::vector<float> &orientationClassPriorsR2L,
+                                                        double smoothingFactor, 
+                                                        std::ostream &out) const
+{
+  assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dright dleft
+
+  const PROPERTY_VALUES *allPropertyValues = GetProperty( key );
+
+  if ( allPropertyValues == NULL ) {
+    return;
+  }
+
+  // bidirectional MSLR phrase orientation with 2x4 orientation classes: 
+  // mono swap dright dleft
+  std::vector<float> orientationClassCountSumL2R(4,0);
+  std::vector<float> orientationClassCountSumR2L(4,0);
+
+  for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin(); 
+       iter!=allPropertyValues->end(); ++iter) {
+    std::string l2rOrientationClass, r2lOrientationClass;
+    try {
+      istringstream tokenizer(iter->first);
+      tokenizer >> l2rOrientationClass;
+      tokenizer >> r2lOrientationClass;
+      if ( tokenizer.peek() != EOF ) {
+        UTIL_THROW(util::Exception, "ExtractionPhrasePair" 
+                   << ": Collecting phrase orientations failed. "
+                   << "Too many tokens?");
+      }
+    } catch (const std::exception &e) {
+      UTIL_THROW(util::Exception, "ExtractionPhrasePair" 
+                 << ": Collecting phrase orientations failed. "
+                 << "Flawed property value in extract file?");
+    }
+
+    int l2rOrientationClassId = -1;
+    if (!l2rOrientationClass.compare("mono")) {
+      l2rOrientationClassId = 0;
+    }
+    if (!l2rOrientationClass.compare("swap")) {
+      l2rOrientationClassId = 1;
+    }
+    if (!l2rOrientationClass.compare("dright")) {
+      l2rOrientationClassId = 2;
+    }
+    if (!l2rOrientationClass.compare("dleft")) {
+      l2rOrientationClassId = 3;
+    }
+    if (l2rOrientationClassId == -1) {
+      UTIL_THROW(util::Exception, "ExtractionPhrasePair" 
+                 << ": Collecting phrase orientations failed. "
+                 << "Unknown orientation class \"" << l2rOrientationClass << "\"." );
+    }
+    int r2lOrientationClassId = -1;
+    if (!r2lOrientationClass.compare("mono")) {
+      r2lOrientationClassId = 0;
+    }
+    if (!r2lOrientationClass.compare("swap")) {
+      r2lOrientationClassId = 1;
+    }
+    if (!r2lOrientationClass.compare("dright")) {
+      r2lOrientationClassId = 2;
+    }
+    if (!r2lOrientationClass.compare("dleft")) {
+      r2lOrientationClassId = 3;
+    }
+    if (r2lOrientationClassId == -1) {
+      UTIL_THROW(util::Exception, "ExtractionPhrasePair" 
+                 << ": Collecting phrase orientations failed. "
+                 << "Unknown orientation class \"" << r2lOrientationClass << "\"." );
+    }
+
+    orientationClassCountSumL2R[l2rOrientationClassId] += iter->second;
+    orientationClassCountSumR2L[r2lOrientationClassId] += iter->second;
+  }
+
+  for (size_t i=0; i<4; ++i) {
+    if (i>0) {
+      out << " ";
+    }
+    out << (float)( (smoothingFactor*orientationClassPriorsL2R[i] + orientationClassCountSumL2R[i]) / (smoothingFactor + m_count) );
+  }
+  for (size_t i=0; i<4; ++i) {
+    out << " " << (float)( (smoothingFactor*orientationClassPriorsR2L[i] + orientationClassCountSumR2L[i]) / (smoothingFactor + m_count) );
+  }
+}
+
+
 
 }
 
diff --git a/phrase-extract/ExtractionPhrasePair.h b/phrase-extract/ExtractionPhrasePair.h
index ba23ac1f2..e0f5dc5fb 100644
--- a/phrase-extract/ExtractionPhrasePair.h
+++ b/phrase-extract/ExtractionPhrasePair.h
@@ -131,6 +131,12 @@ public:
                                                 boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& sourceRHSAndLHSJointCounts, 
                                                 Vocabulary &vcbT) const;
 
+  void CollectAllPhraseOrientations(const std::string &key, 
+                                    const std::vector<float> &orientationClassPriorsL2R, 
+                                    const std::vector<float> &orientationClassPriorsR2L, 
+                                    double smoothingFactor, 
+                                    std::ostream &out) const;
+
   void AddProperties( const std::string &str, float count );
 
   void AddProperty( const std::string &key, const std::string &value, float count ) 
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
index 87712d6d3..7132974d4 100644
--- a/phrase-extract/PhraseExtractionOptions.h
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -18,7 +18,6 @@
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  ***********************************************************************/
 
-/* Created by Rohit Gupta, CDAC, Mumbai, India on 18 July, 2012*/
 
 #include <string>
 #include <vector>
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index b86c28586..36dfee2e5 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -27,6 +27,7 @@
 #include "OutputFileStream.h"
 #include "Options.h"
 #include "ParseTree.h"
+#include "PhraseOrientation.h"
 #include "ScfgRule.h"
 #include "ScfgRuleWriter.h"
 #include "Span.h"
@@ -66,11 +67,12 @@ int ExtractGHKM::Main(int argc, char *argv[])
   // Open output files.
   OutputFileStream fwdExtractStream;
   OutputFileStream invExtractStream;
-  std::ofstream glueGrammarStream;
-  std::ofstream targetUnknownWordStream;
-  std::ofstream sourceUnknownWordStream;
-  std::ofstream sourceLabelSetStream;
-  std::ofstream unknownWordSoftMatchesStream;
+  OutputFileStream glueGrammarStream;
+  OutputFileStream targetUnknownWordStream;
+  OutputFileStream sourceUnknownWordStream;
+  OutputFileStream sourceLabelSetStream;
+  OutputFileStream unknownWordSoftMatchesStream;
+
   std::string fwdFileName = options.extractFile;
   std::string invFileName = options.extractFile + std::string(".inv");
   if (options.gzOutput) {
@@ -79,6 +81,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
   }
   OpenOutputFileOrDie(fwdFileName, fwdExtractStream);
   OpenOutputFileOrDie(invFileName, invExtractStream);
+
   if (!options.glueGrammarFile.empty()) {
     OpenOutputFileOrDie(options.glueGrammarFile, glueGrammarStream);
   }
@@ -118,7 +121,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
   std::string sourceLine;
   std::string alignmentLine;
   Alignment alignment;
-  XmlTreeParser xmlTreeParser(targetLabelSet, targetTopLabelSet);
+  XmlTreeParser targetXmlTreeParser(targetLabelSet, targetTopLabelSet);
 //  XmlTreeParser sourceXmlTreeParser(sourceLabelSet, sourceTopLabelSet);
   ScfgRuleWriter writer(fwdExtractStream, invExtractStream, options);
   size_t lineNum = options.sentenceOffset;
@@ -144,7 +147,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
     }
     std::auto_ptr<ParseTree> targetParseTree;
     try {
-      targetParseTree = xmlTreeParser.Parse(targetLine);
+      targetParseTree = targetXmlTreeParser.Parse(targetLine);
       assert(targetParseTree.get());
     } catch (const Exception &e) {
       std::ostringstream oss;
@@ -181,7 +184,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
     // Read source tokens.
     std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
 
-    // Construct a source ParseTree object object from the SyntaxTree object.
+    // Construct a source ParseTree object from the SyntaxTree object.
     std::auto_ptr<ParseTree> sourceParseTree;
 
     if (options.sourceLabels) {
@@ -235,11 +238,26 @@ int ExtractGHKM::Main(int argc, char *argv[])
       graph.ExtractComposedRules(options);
     }
 
+    // Initialize phrase orientation scoring object
+    PhraseOrientation phraseOrientation( sourceTokens, targetXmlTreeParser.GetWords(), alignment);
+
     // Write the rules, subject to scope pruning.
     const std::vector<Node *> &targetNodes = graph.GetTargetNodes();
     for (std::vector<Node *>::const_iterator p = targetNodes.begin();
          p != targetNodes.end(); ++p) {
+
       const std::vector<const Subgraph *> &rules = (*p)->GetRules();
+
+      REO_POS l2rOrientation, r2lOrientation;
+      if (options.phraseOrientation && !rules.empty()) {
+        int sourceSpanBegin = *((*p)->GetSpan().begin());
+        int sourceSpanEnd   = *((*p)->GetSpan().rbegin());
+        l2rOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,L2R);
+        r2lOrientation = phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd,R2L);
+        // std::cerr << "span " << sourceSpanBegin << " " << sourceSpanEnd << std::endl;
+        // std::cerr << "phraseOrientation " << phraseOrientation.GetOrientationInfo(sourceSpanBegin,sourceSpanEnd) << std::endl;
+      }
+
       for (std::vector<const Subgraph *>::const_iterator q = rules.begin();
            q != rules.end(); ++q) {
         ScfgRule *r = 0;
@@ -251,16 +269,34 @@ int ExtractGHKM::Main(int argc, char *argv[])
         // TODO Can scope pruning be done earlier?
         if (r->Scope() <= options.maxScope) {
           if (!options.treeFragments) {
-            writer.Write(*r);
+            writer.Write(*r,false);
           } else {
-            writer.Write(*r,**q);
+            writer.Write(*r,**q,false);
+          }
+          if (options.phraseOrientation) {
+            fwdExtractStream << " {{Orientation ";
+            phraseOrientation.WriteOrientation(fwdExtractStream,l2rOrientation);
+            fwdExtractStream << " ";
+            phraseOrientation.WriteOrientation(fwdExtractStream,r2lOrientation);
+            fwdExtractStream << "}}";
+            phraseOrientation.IncrementPriorCount(L2R,l2rOrientation,1);
+            phraseOrientation.IncrementPriorCount(R2L,r2lOrientation,1);
           }
+          fwdExtractStream << std::endl;
+          invExtractStream << std::endl;
         }
         delete r;
       }
     }
   }
 
+  if (options.phraseOrientation) {
+    std::string phraseOrientationPriorsFileName = options.extractFile + std::string(".phraseOrientationPriors");
+    OutputFileStream phraseOrientationPriorsStream;
+    OpenOutputFileOrDie(phraseOrientationPriorsFileName, phraseOrientationPriorsStream);
+    PhraseOrientation::WritePriorCounts(phraseOrientationPriorsStream);
+  }
+
   std::map<std::string,size_t> sourceLabels;
   if (options.sourceLabels && !options.sourceLabelSetFile.empty()) {
 
@@ -398,6 +434,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
    "extract minimal rules only")
   ("PCFG",
    "include score based on PCFG scores in target corpus")
+  ("PhraseOrientation",
+   "output phrase orientation information")
   ("TreeFragments",
    "output parse tree information")
   ("SourceLabels",
@@ -502,6 +540,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
   if (vm.count("PCFG")) {
     options.pcfg = true;
   }
+  if (vm.count("PhraseOrientation")) {
+    options.phraseOrientation = true;
+  }
   if (vm.count("TreeFragments")) {
     options.treeFragments = true;
   }
@@ -736,8 +777,7 @@ void ExtractGHKM::WriteUnknownWordSoftMatches(
   const std::set<std::string> &labelSet,
   std::ostream &out)
 {
-  std::set<std::string>::const_iterator p = labelSet.begin();
-  for (p; p != labelSet.end(); ++p) {
+  for (std::set<std::string>::const_iterator p = labelSet.begin(); p != labelSet.end(); ++p) {
       std::string label = *p;
       out << "UNK " << label << std::endl;
   }
diff --git a/phrase-extract/extract-ghkm/Options.h b/phrase-extract/extract-ghkm/Options.h
index 28a581802..0102e2f64 100644
--- a/phrase-extract/extract-ghkm/Options.h
+++ b/phrase-extract/extract-ghkm/Options.h
@@ -40,6 +40,7 @@ public:
     , maxScope(3)
     , minimal(false)
     , pcfg(false)
+    , phraseOrientation(false)
     , treeFragments(false)
     , sourceLabels(false)
     , sentenceOffset(0)
@@ -64,6 +65,7 @@ public:
   int maxScope;
   bool minimal;
   bool pcfg;
+  bool phraseOrientation;
   bool treeFragments;
   bool sourceLabels;
   std::string sourceLabelSetFile;
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.cpp b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
new file mode 100644
index 000000000..a96e5361c
--- /dev/null
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.cpp
@@ -0,0 +1,417 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "PhraseOrientation.h"
+
+#include <iostream>
+
+#include <boost/assign/list_of.hpp>
+
+namespace Moses
+{
+namespace GHKM
+{
+
+std::vector<float> PhraseOrientation::m_l2rOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
+std::vector<float> PhraseOrientation::m_r2lOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
+
+PhraseOrientation::PhraseOrientation(const std::vector<std::string> &source,
+                                     const std::vector<std::string> &target,
+                                     const Alignment &alignment)
+  : m_source(source)
+  , m_target(target)
+  , m_alignment(alignment)
+{
+
+  int countF = m_source.size();
+  int countE = m_target.size();
+
+  // prepare data structures for alignments
+  std::vector<std::vector<int> > alignedToS;
+  for(int i=0; i<countF; ++i) {
+    std::vector< int > dummy;
+    alignedToS.push_back(dummy);
+  }
+  for(int i=0; i<countE; ++i) {
+    std::vector< int > dummy;
+    m_alignedToT.push_back(dummy);
+  }
+  std::vector<int> alignedCountS(countF,0);
+
+  for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) {
+    m_alignedToT[a->second].push_back(a->first);
+    alignedCountS[a->first]++;
+    alignedToS[a->first].push_back(a->second);
+  }
+
+  for (int startF=0; startF<countF; ++startF) {
+    for (int endF=startF; endF<countF; ++endF) {
+
+      int minE = 9999;
+      int maxE = -1;
+      for (int fi=startF; fi<=endF; ++fi) {
+        for (size_t i=0; i<alignedToS[fi].size(); ++i) {
+          int ei = alignedToS[fi][i];
+          if (ei<minE) {
+            minE = ei;
+          }
+          if (ei>maxE) {
+            maxE = ei;
+          }
+        }
+      }
+
+      m_minAndMaxAlignedToSourceSpan[ std::pair<int,int>(startF,endF) ] = std::pair<int,int>(minE,maxE); 
+    }
+  }
+
+  // check alignments for target phrase startE...endE
+  // loop over continuous phrases which are compatible with the word alignments
+  for (int startE=0; startE<countE; ++startE) {
+    for (int endE=startE; endE<countE; ++endE) {
+
+      int minF = 9999;
+      int maxF = -1;
+      std::vector< int > usedF = alignedCountS;
+      for (int ei=startE; ei<=endE; ++ei) {
+        for (size_t i=0; i<m_alignedToT[ei].size(); ++i) {
+          int fi = m_alignedToT[ei][i];
+          if (fi<minF) {
+            minF = fi;
+          }
+          if (fi>maxF) {
+            maxF = fi;
+          }
+          usedF[fi]--;
+        }
+      }
+
+      if (maxF >= 0) { // aligned to any source words at all
+
+        // check if source words are aligned to out of bound target words
+        bool out_of_bounds = false;
+        for (int fi=minF; fi<=maxF && !out_of_bounds; ++fi)
+          if (usedF[fi]>0) {
+            // cout << "ouf of bounds: " << fi << "\n";
+            out_of_bounds = true;
+          }
+
+        // cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
+        if (!out_of_bounds) {
+          // start point of source phrase may retreat over unaligned
+          for (int startF=minF;
+               (startF>=0 &&
+                (startF==minF || alignedCountS[startF]==0)); // unaligned
+               startF--) {
+            // end point of source phrase may advance over unaligned
+            for (int endF=maxF;
+                 (endF<countF &&
+                  (endF==maxF || alignedCountS[endF]==0)); // unaligned
+                 endF++) { // at this point we have extracted a phrase
+
+              InsertPhraseVertices(m_topLeft, m_topRight, m_bottomLeft, m_bottomRight,
+                                   startF, startE, endF, endE);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+void PhraseOrientation::InsertVertex( HSentenceVertices & corners, int x, int y )
+{
+  std::set<int> tmp;
+  tmp.insert(x);
+  std::pair< HSentenceVertices::iterator, bool > ret = corners.insert( std::pair<int, std::set<int> > (y, tmp) );
+  if (ret.second == false) {
+    ret.first->second.insert(x);
+  }
+}
+
+
+void PhraseOrientation::InsertPhraseVertices(HSentenceVertices & topLeft,
+                                             HSentenceVertices & topRight,
+                                             HSentenceVertices & bottomLeft,
+                                             HSentenceVertices & bottomRight,
+                                             int startF, int startE, int endF, int endE)
+{
+
+  InsertVertex(topLeft, startF, startE);
+  InsertVertex(topRight, endF, startE);
+  InsertVertex(bottomLeft, startF, endE);
+  InsertVertex(bottomRight, endF, endE);
+}
+
+
+const std::string PhraseOrientation::GetOrientationInfoString(int startF, int endF, REO_DIR direction) const
+{
+  boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax 
+    = m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
+
+  if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
+    int startE = (foundMinMax->second).first;
+    int endE   = (foundMinMax->second).second;
+//    std::cerr << "Phrase orientation for"
+//      << " startF=" << startF
+//      << " endF="   << endF
+//      << " startE=" << startE
+//      << " endE="   << endE
+//      << std::endl;
+    return GetOrientationInfoString(startF, startE, endF, endE, direction);
+  } else {
+    std::cerr << "Error: not able to determine phrase orientation" << std::endl;
+    std::exit(1);
+  }
+}
+
+
+const std::string PhraseOrientation::GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction) const
+{
+  REO_POS hierPrevOrient, hierNextOrient;
+
+  bool connectedLeftTopP  = IsAligned( startF-1, startE-1 );
+  bool connectedRightTopP = IsAligned( endF+1,   startE-1 );
+  bool connectedLeftTopN  = IsAligned( endF+1,   endE+1   );
+  bool connectedRightTopN = IsAligned( startF-1, endE+1   );
+
+  if ( direction == L2R || direction == BIDIR )
+    hierPrevOrient = GetOrientHierModel(REO_MSLR,
+                                        connectedLeftTopP, connectedRightTopP,
+                                        startF, endF, startE, endE, m_source.size()-1, 0, 1, 
+                                        &ge, &lt, 
+                                        m_bottomRight, m_bottomLeft);
+
+  if ( direction == R2L || direction == BIDIR )
+    hierNextOrient = GetOrientHierModel(REO_MSLR,
+                                        connectedLeftTopN, connectedRightTopN,
+                                        endF, startF, endE, startE, 0, m_source.size()-1, -1, 
+                                        &lt, &ge, 
+                                        m_bottomLeft, m_bottomRight); 
+
+  switch (direction) {
+    case L2R:
+      return GetOrientationString(hierPrevOrient, REO_MSLR);
+      break;
+    case R2L:
+      return GetOrientationString(hierNextOrient, REO_MSLR);
+      break;
+    case BIDIR:
+      return GetOrientationString(hierPrevOrient, REO_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MSLR);
+      break;
+    default:
+      return GetOrientationString(hierPrevOrient, REO_MSLR) + " " + GetOrientationString(hierNextOrient, REO_MSLR);
+      break;
+  }
+  return "PhraseOrientationERROR";
+}
+
+
+REO_POS PhraseOrientation::GetOrientationInfo(int startF, int endF, REO_DIR direction) const
+{
+  boost::unordered_map< std::pair<int,int> , std::pair<int,int> >::const_iterator foundMinMax 
+    = m_minAndMaxAlignedToSourceSpan.find( std::pair<int,int>(startF,endF) );
+
+  if ( foundMinMax != m_minAndMaxAlignedToSourceSpan.end() ) {
+    int startE = (foundMinMax->second).first;
+    int endE   = (foundMinMax->second).second;
+//    std::cerr << "Phrase orientation for"
+//      << " startF=" << startF
+//      << " endF="   << endF
+//      << " startE=" << startE
+//      << " endE="   << endE
+//      << std::endl;
+    return GetOrientationInfo(startF, startE, endF, endE, direction);
+  } else {
+    std::cerr << "Error: not able to determine phrase orientation" << std::endl;
+    std::exit(1);
+  }
+}
+
+
+REO_POS PhraseOrientation::GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const
+{
+  if ( direction != L2R && direction != R2L ) {
+    std::cerr << "PhraseOrientation::GetOrientationInfo(): direction should be either L2R or R2L" << std::endl;
+    std::exit(1);
+  }
+
+  bool connectedLeftTopP  = IsAligned( startF-1, startE-1 );
+  bool connectedRightTopP = IsAligned( endF+1,   startE-1 );
+  bool connectedLeftTopN  = IsAligned( endF+1,   endE+1   );
+  bool connectedRightTopN = IsAligned( startF-1, endE+1   );
+
+  if ( direction == L2R )
+    return GetOrientHierModel(REO_MSLR,
+                              connectedLeftTopP, connectedRightTopP,
+                              startF, endF, startE, endE, m_source.size()-1, 0, 1, 
+                              &ge, &lt, 
+                              m_bottomRight, m_bottomLeft);
+
+  if ( direction == R2L )
+    return GetOrientHierModel(REO_MSLR,
+                              connectedLeftTopN, connectedRightTopN,
+                              endF, startF, endE, startE, 0, m_source.size()-1, -1, 
+                              &lt, &ge, 
+                              m_bottomLeft, m_bottomRight);
+
+  return UNKNOWN; 
+}
+
+
+// to be called with countF-1 instead of countF
+REO_POS PhraseOrientation::GetOrientHierModel(REO_MODEL_TYPE modelType,
+                                              bool connectedLeftTop, bool connectedRightTop,
+                                              int startF, int endF, int startE, int endE, int countF, int zero, int unit,
+                                              bool (*ge)(int, int), bool (*lt)(int, int),
+                                              const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const
+{
+  HSentenceVertices::const_iterator it;
+
+  if ((connectedLeftTop && !connectedRightTop) ||
+      ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
+       it->second.find(startF-unit) != it->second.end()))
+    return LEFT;
+
+  if (modelType == REO_MONO)
+    return UNKNOWN;
+
+  if ((!connectedLeftTop &&  connectedRightTop) ||
+      ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
+       it->second.find(endF + unit) != it->second.end()))
+    return RIGHT;
+
+  if (modelType == REO_MSD)
+    return UNKNOWN;
+
+  connectedLeftTop = false;
+  for (int indexF=startF-2*unit; (*ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
+    if ((connectedLeftTop = ((it = bottomRight.find(startE - unit)) != bottomRight.end() &&
+                             it->second.find(indexF) != it->second.end())))
+      return DRIGHT;
+  }
+
+  connectedRightTop = false;
+  for (int indexF=endF+2*unit; (*lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
+    if ((connectedRightTop = ((it = bottomLeft.find(startE - unit)) != bottomLeft.end() &&
+                              it->second.find(indexF) != it->second.end())))
+      return DLEFT;
+  }
+
+  return UNKNOWN;
+}
+
+
+const std::string PhraseOrientation::GetOrientationString(const REO_POS orient, const REO_MODEL_TYPE modelType)
+{
+  std::ostringstream oss;
+  WriteOrientation(oss, orient, modelType);
+  return oss.str();
+}
+
+
+void PhraseOrientation::WriteOrientation(std::ostream& out, const REO_POS orient, const REO_MODEL_TYPE modelType)
+{
+  switch(orient) {
+  case LEFT:
+    out << "mono";
+    break;
+  case RIGHT:
+    out << "swap";
+    break;
+  case DRIGHT:
+    out << "dright";
+    break;
+  case DLEFT:
+    out << "dleft";
+    break;
+  case UNKNOWN:
+    switch(modelType) {
+    case REO_MONO:
+      out << "nomono";
+      break;
+    case REO_MSD:
+      out << "other";
+      break;
+    case REO_MSLR:
+      out << "dright";
+      break;
+    }
+    break;
+  }
+}
+
+
+bool PhraseOrientation::IsAligned(int fi, int ei) const
+{
+  if (ei == -1 && fi == -1)
+    return true;
+
+  if (ei <= -1 || fi <= -1)
+    return false;
+
+  if (ei == (int)m_target.size() && fi == (int)m_source.size())
+    return true;
+
+  if (ei >= (int)m_target.size() || fi >= (int)m_source.size())
+    return false;
+
+  for (size_t i=0; i<m_alignedToT[ei].size(); ++i)
+    if (m_alignedToT[ei][i] == fi)
+      return true;
+
+  return false;
+}
+
+
+void PhraseOrientation::IncrementPriorCount(REO_DIR direction, REO_POS orient, float increment)
+{
+  assert(direction==L2R || direction==R2L);
+  if (direction == L2R) {
+    m_l2rOrientationPriorCounts[orient] += increment;
+  } else if (direction == R2L) {
+    m_r2lOrientationPriorCounts[orient] += increment;
+  }
+}
+
+
+void PhraseOrientation::WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType)
+{
+  std::map<std::string,float> l2rOrientationPriorCountsMap;
+  std::map<std::string,float> r2lOrientationPriorCountsMap;
+  for (int orient=0; orient<=UNKNOWN; ++orient) {
+    l2rOrientationPriorCountsMap[GetOrientationString((REO_POS)orient, modelType)] += m_l2rOrientationPriorCounts[orient];
+  } 
+  for (int orient=0; orient<=UNKNOWN; ++orient) {
+    r2lOrientationPriorCountsMap[GetOrientationString((REO_POS)orient, modelType)] += m_r2lOrientationPriorCounts[orient];
+  }
+  for (std::map<std::string,float>::const_iterator l2rOrientationPriorCountsMapIt = l2rOrientationPriorCountsMap.begin();
+       l2rOrientationPriorCountsMapIt != l2rOrientationPriorCountsMap.end(); ++l2rOrientationPriorCountsMapIt) {
+    out << "L2R_" << l2rOrientationPriorCountsMapIt->first << " " << l2rOrientationPriorCountsMapIt->second << std::endl;
+  }
+  for (std::map<std::string,float>::const_iterator r2lOrientationPriorCountsMapIt = r2lOrientationPriorCountsMap.begin();
+       r2lOrientationPriorCountsMapIt != r2lOrientationPriorCountsMap.end(); ++r2lOrientationPriorCountsMapIt) {
+    out << "R2L_" << r2lOrientationPriorCountsMapIt->first << " " << r2lOrientationPriorCountsMapIt->second << std::endl;
+  }
+}
+
+}  // namespace GHKM
+}  // namespace Moses
+
diff --git a/phrase-extract/extract-ghkm/PhraseOrientation.h b/phrase-extract/extract-ghkm/PhraseOrientation.h
new file mode 100644
index 000000000..6e83929f1
--- /dev/null
+++ b/phrase-extract/extract-ghkm/PhraseOrientation.h
@@ -0,0 +1,102 @@
+
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#pragma once
+
+#include "Alignment.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <boost/unordered_map.hpp>
+
+namespace Moses
+{
+namespace GHKM
+{
+
+enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
+enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
+enum REO_DIR {L2R, R2L, BIDIR};
+
+// The key of the map is the English index and the value is a set of the source ones
+typedef std::map <int, std::set<int> > HSentenceVertices;
+
+
+class PhraseOrientation
+{
+public:
+
+  PhraseOrientation(const std::vector<std::string> &source,
+                    const std::vector<std::string> &target,
+                    const Alignment &alignment);
+
+  REO_POS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
+  REO_POS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
+  const std::string GetOrientationInfoString(int startF, int endF, REO_DIR direction=BIDIR) const;
+  const std::string GetOrientationInfoString(int startF, int startE, int endF, int endE, REO_DIR direction=BIDIR) const;
+  static const std::string GetOrientationString(const REO_POS orient, const REO_MODEL_TYPE modelType=REO_MSLR);
+  static void WriteOrientation(std::ostream& out, const REO_POS orient, const REO_MODEL_TYPE modelType=REO_MSLR);
+  void IncrementPriorCount(REO_DIR direction, REO_POS orient, float increment);
+  static void WritePriorCounts(std::ostream& out, const REO_MODEL_TYPE modelType=REO_MSLR);
+
+private:
+
+  void InsertVertex( HSentenceVertices & corners, int x, int y );
+
+  void InsertPhraseVertices(HSentenceVertices & topLeft,
+                            HSentenceVertices & topRight,
+                            HSentenceVertices & bottomLeft,
+                            HSentenceVertices & bottomRight,
+                            int startF, int startE, int endF, int endE);
+
+  REO_POS GetOrientHierModel(REO_MODEL_TYPE modelType,
+                             bool connectedLeftTop, bool connectedRightTop,
+                             int startF, int endF, int startE, int endE, int countF, int zero, int unit,
+                             bool (*ge)(int, int), bool (*lt)(int, int),
+                             const HSentenceVertices & bottomRight, const HSentenceVertices & bottomLeft) const;
+
+  bool IsAligned(int fi, int ei) const;
+
+  static bool ge(int first, int second) { return first >= second; };
+  static bool le(int first, int second) { return first <= second; };
+  static bool lt(int first, int second) { return first < second; };
+
+  const std::vector<std::string> &m_source;
+  const std::vector<std::string> &m_target;
+  const Alignment &m_alignment;
+
+  std::vector<std::vector<int> > m_alignedToT;
+
+  HSentenceVertices m_topLeft;
+  HSentenceVertices m_topRight;
+  HSentenceVertices m_bottomLeft;
+  HSentenceVertices m_bottomRight;
+
+  boost::unordered_map< std::pair<int,int> , std::pair<int,int> > m_minAndMaxAlignedToSourceSpan;
+
+  static std::vector<float> m_l2rOrientationPriorCounts;
+  static std::vector<float> m_r2lOrientationPriorCounts;
+};
+
+}  // namespace GHKM
+}  // namespace Moses
+
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index be373b67b..2fba6930b 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
@@ -169,14 +169,17 @@ void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
   }
 }
 
-void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g) 
+void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g, bool printEndl) 
 {
-    Write(rule,false);
-    m_fwd << " {{Tree ";
-    g.PrintTree(m_fwd);
-    m_fwd << "}}";
+  Write(rule,false);
+  m_fwd << " {{Tree ";
+  g.PrintTree(m_fwd);
+  m_fwd << "}}";
+
+  if (printEndl) {
     m_fwd << std::endl;
     m_inv << std::endl;
+  }
 }
 
 }  // namespace GHKM
diff --git a/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index 18f423149..8a8564580 100644
--- a/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
@@ -44,7 +44,7 @@ public:
 
   void Write(const ScfgRule &rule, bool printEndl=true);
 
-  void Write(const ScfgRule &rule, const Subgraph &g); 
+  void Write(const ScfgRule &rule, const Subgraph &g, bool printEndl=true); 
 
 private:
   // Disallow copying
diff --git a/phrase-extract/extract-ghkm/XmlTreeParser.h b/phrase-extract/extract-ghkm/XmlTreeParser.h
index e5bf5b463..d0209254f 100644
--- a/phrase-extract/extract-ghkm/XmlTreeParser.h
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.h
@@ -49,6 +49,8 @@ public:
   static std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
                                               const std::vector<std::string> &);
 
+  const std::vector<std::string>& GetWords() { return m_words; };
+
 private:
 
   std::set<std::string> &m_labelSet;
diff --git a/phrase-extract/extract-main.cpp b/phrase-extract/extract-main.cpp
index fe3d99cd2..552dcb739 100644
--- a/phrase-extract/extract-main.cpp
+++ b/phrase-extract/extract-main.cpp
@@ -363,8 +363,6 @@ void ExtractTask::extract(SentenceAlignment &sentence)
   HSentenceVertices outBottomLeft;
   HSentenceVertices outBottomRight;
 
-  HSentenceVertices::const_iterator it;
-
   bool relaxLimit = m_options.isHierModel();
   bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel();
 
diff --git a/phrase-extract/score-main.cpp b/phrase-extract/score-main.cpp
index e8ba1d942..7f155f6ed 100644
--- a/phrase-extract/score-main.cpp
+++ b/phrase-extract/score-main.cpp
@@ -46,6 +46,7 @@ LexicalTable lexTable;
 bool inverseFlag = false;
 bool hierarchicalFlag = false;
 bool pcfgFlag = false;
+bool phraseOrientationFlag = false;
 bool treeFragmentsFlag = false;
 bool sourceSyntaxLabelsFlag = false;
 bool sourceSyntaxLabelSetFlag = false;
@@ -69,6 +70,7 @@ bool nonTermContext = false;
 int countOfCounts[COC_MAX+1];
 int totalDistinct = 0;
 float minCountHierarchical = 0;
+bool phraseOrientationPriorsFlag = false;
 
 boost::unordered_map<std::string,float> sourceLHSCounts;
 boost::unordered_map<std::string, boost::unordered_map<std::string,float>* > targetLHSAndSourceLHSJointCounts;
@@ -82,6 +84,9 @@ std::set<std::string> targetPreferenceLabelSet;
 std::map<std::string,size_t> targetPreferenceLabels; 
 std::vector<std::string> targetPreferenceLabelsByIndex;
 
+std::vector<float> orientationClassPriorsL2R(4,0); // mono swap dright dleft
+std::vector<float> orientationClassPriorsR2L(4,0); // mono swap dright dleft
+
 Vocabulary vcbT;
 Vocabulary vcbS;
 
@@ -106,6 +111,7 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair, float, int, ostrea
 double computeLexicalTranslation( const PHRASE *phraseSource, const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
 double computeUnalignedPenalty( const ALIGNMENT *alignmentTargetToSource );
 set<std::string> functionWordList;
+void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors, std::vector<float> &orientationClassPriorsL2R, std::vector<float> &orientationClassPriorsR2L);
 void loadFunctionWords( const string &fileNameFunctionWords );
 double computeUnalignedFWPenalty( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
 int calcCrossedNonTerm( const PHRASE *phraseTarget, const ALIGNMENT *alignmentTargetToSource );
@@ -136,6 +142,7 @@ int main(int argc, char* argv[])
   std::string fileNameTargetPreferenceLabelSet;
   std::string fileNameLeftHandSideTargetPreferenceLabelCounts;
   std::string fileNameLeftHandSideRuleTargetTargetPreferenceLabelCounts;
+  std::string fileNamePhraseOrientationPriors;
   std::vector<std::string> featureArgs; // all unknown args passed to feature manager
 
   for(int i=4; i<argc; i++) {
@@ -148,9 +155,12 @@ int main(int argc, char* argv[])
     } else if (strcmp(argv[i],"--PCFG") == 0) {
       pcfgFlag = true;
       std::cerr << "including PCFG scores" << std::endl;
+    } else if (strcmp(argv[i],"--PhraseOrientation") == 0) {
+      phraseOrientationFlag = true;
+      std::cerr << "including phrase orientation information" << std::endl;
     } else if (strcmp(argv[i],"--TreeFragments") == 0) {
       treeFragmentsFlag = true;
-      std::cerr << "including tree fragment information from syntactic parse\n";
+      std::cerr << "including tree fragment information from syntactic parse" << std::endl;
     } else if (strcmp(argv[i],"--SourceLabels") == 0) {
       sourceSyntaxLabelsFlag = true;
       std::cerr << "including source label information" << std::endl;
@@ -216,6 +226,14 @@ int main(int argc, char* argv[])
     } else if (strcmp(argv[i],"--CrossedNonTerm") == 0) {
       crossedNonTerm = true;
       std::cerr << "crossed non-term reordering feature" << std::endl;
+    } else if (strcmp(argv[i],"--PhraseOrientationPriors") == 0) {
+      phraseOrientationPriorsFlag = true;
+      if (i+1==argc) {
+          std::cerr << "ERROR: specify priors file for phrase orientation!" << std::endl;
+        exit(1);
+      }
+      fileNamePhraseOrientationPriors = argv[++i];
+      std::cerr << "smoothing phrase orientation with priors from " << fileNamePhraseOrientationPriors << std::endl;
     } else if (strcmp(argv[i],"--SpanLength") == 0) {
       spanLength = true;
       std::cerr << "span length feature" << std::endl;
@@ -254,6 +272,10 @@ int main(int argc, char* argv[])
     for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
   }
 
+  if (phraseOrientationPriorsFlag) {
+    loadOrientationPriors(fileNamePhraseOrientationPriors,orientationClassPriorsL2R,orientationClassPriorsR2L);
+  }
+
   // sorted phrase extraction file
   Moses::InputFileStream extractFile(fileNameExtract);
 
@@ -774,11 +796,6 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
   if (kneserNeyFlag)
     phraseTableFile << " " << distinctCount;
 
-  if ((treeFragmentsFlag || sourceSyntaxLabelsFlag || targetPreferenceLabelsFlag) && 
-      !inverseFlag) {
-    phraseTableFile << " |||";
-  }
-
   phraseTableFile << " |||";
 
   // tree fragments
@@ -832,6 +849,13 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
     }
   }
 
+  // phrase orientation
+  if (phraseOrientationFlag && !inverseFlag) {
+    phraseTableFile << " {{Orientation ";
+    phrasePair.CollectAllPhraseOrientations("Orientation",orientationClassPriorsL2R,orientationClassPriorsR2L,0.5,phraseTableFile);
+    phraseTableFile << "}}";
+  }
+
   if (spanLength && !inverseFlag) {
 	  string propValue = phrasePair.CollectAllPropertyValues("SpanLength");
 	  if (!propValue.empty()) {
@@ -851,6 +875,94 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
 
 
 
+void loadOrientationPriors(const std::string &fileNamePhraseOrientationPriors, 
+                           std::vector<float> &orientationClassPriorsL2R, 
+                           std::vector<float> &orientationClassPriorsR2L)
+{
+  assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dright dleft
+  
+  std::cerr << "Loading phrase orientation priors from " << fileNamePhraseOrientationPriors;
+  ifstream inFile;
+  inFile.open(fileNamePhraseOrientationPriors.c_str());
+  if (inFile.fail()) {
+    std::cerr << " - ERROR: could not open file" << std::endl;
+    exit(1);
+  }
+
+  std::string line;
+  size_t linesRead = 0;
+  float l2rSum = 0;
+  float r2lSum = 0;
+  while (getline(inFile, line)) {
+    istringstream tokenizer(line);
+    std::string key;
+    tokenizer >> key;
+
+    bool l2rFlag = false;
+    bool r2lFlag = false;
+    if (!key.substr(0,4).compare("L2R_")) {
+      l2rFlag = true;
+    }
+    if (!key.substr(0,4).compare("R2L_")) {
+      r2lFlag = true;
+    }
+    if (!l2rFlag && !r2lFlag) {
+       std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
+    }
+    key.erase(0,4);
+
+    int orientationClassId = -1;
+    if (!key.compare("mono")) {
+      orientationClassId = 0;
+    }
+    if (!key.compare("swap")) {
+      orientationClassId = 1;
+    }
+    if (!key.compare("dright")) {
+      orientationClassId = 2;
+    }
+    if (!key.compare("dleft")) {
+      orientationClassId = 3;
+    }
+    if (orientationClassId == -1) {
+       std::cerr << " - ERROR: malformed line in orientation priors file" << std::endl;
+    }
+
+    float count;
+    tokenizer >> count;
+
+    if (l2rFlag) {
+      orientationClassPriorsL2R[orientationClassId] += count;
+      l2rSum += count;
+    }
+    if (r2lFlag) {
+      orientationClassPriorsR2L[orientationClassId] += count;
+      r2lSum += count;
+    }
+
+    ++linesRead;
+  }
+
+  // normalization: return prior probabilities, not counts
+  if (l2rSum != 0) {
+    for (std::vector<float>::iterator orientationClassPriorsL2RIt = orientationClassPriorsL2R.begin();
+         orientationClassPriorsL2RIt != orientationClassPriorsL2R.end(); ++orientationClassPriorsL2RIt) {
+      *orientationClassPriorsL2RIt /= l2rSum;
+    }
+  }
+  if (r2lSum != 0) {
+    for (std::vector<float>::iterator orientationClassPriorsR2LIt = orientationClassPriorsR2L.begin();
+         orientationClassPriorsR2LIt != orientationClassPriorsR2L.end(); ++orientationClassPriorsR2LIt) {
+      *orientationClassPriorsR2LIt /= r2lSum;
+    }
+  }
+
+  std::cerr << " - read " << linesRead << " lines from orientation priors file" << std::endl;
+  inFile.close();
+}
+
+
+
 bool calcCrossedNonTerm( size_t targetPos, size_t sourcePos, const ALIGNMENT *alignmentTargetToSource )
 {
   for (size_t currTarget = 0; currTarget < alignmentTargetToSource->size(); ++currTarget) {
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index bf0f0129a..1108bec1b 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -2195,9 +2195,10 @@ sub define_training_extract_phrases {
       $cmd .= "-glue-grammar-file $glue_grammar_file ";
 
       if (&get("GENERAL:output-parser") && (&get("TRAINING:use-unknown-word-labels") || &get("TRAINING:use-unknown-word-soft-matches"))) {
-	  my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""));
-	  $cmd .= "-unknown-word-label $unknown_word_label ";
+        my $unknown_word_label = &versionize(&long_file_name("unknown-word-label","model",""));
+        $cmd .= "-unknown-word-label $unknown_word_label ";
       }
+
       if (&get("GENERAL:output-parser") && &get("TRAINING:use-unknown-word-soft-matches")) {
           my $unknown_word_soft_matches = &versionize(&long_file_name("unknown-word-soft-matches","model",""));
           $cmd .= "-unknown-word-soft-matches $unknown_word_soft_matches ";
@@ -2210,6 +2211,12 @@ sub define_training_extract_phrases {
       if (&get("TRAINING:ghkm-tree-fragments")) {
         $cmd .= "-ghkm-tree-fragments ";
       }
+
+      if (&get("TRAINING:ghkm-phrase-orientation")) {
+        $cmd .= "-ghkm-phrase-orientation ";
+        my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
+        $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
+      }
     }
 
     my $extract_settings = &get("TRAINING:extract-settings");
@@ -2242,6 +2249,11 @@ sub define_training_build_ttable {
       if (&get("TRAINING:ghkm-tree-fragments")) {
         $cmd .= "-ghkm-tree-fragments ";
       }
+      if (&get("TRAINING:ghkm-phrase-orientation")) {
+        $cmd .= "-ghkm-phrase-orientation ";
+        my $phrase_orientation_priors_file = &versionize(&long_file_name("phrase-orientation-priors","model",""));
+        $cmd .= "-phrase-orientation-priors-file $phrase_orientation_priors_file ";
+      }
     }
     
     &create_step($step_id,$cmd);
diff --git a/scripts/generic/extract-parallel.perl b/scripts/generic/extract-parallel.perl
index 7abada1de..ff6a058b5 100755
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@@ -29,6 +29,8 @@ my $otherExtractArgs= "";
 my $weights = "";
 my $baselineExtract;
 my $glueFile;
+my $phraseOrientation = 0;
+my $phraseOrientationPriorsFile;
 
 for (my $i = 8; $i < $#ARGV + 1; ++$i)
 {
@@ -45,6 +47,11 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
     $glueFile = $ARGV[++$i];
     next;
   }
+  $phraseOrientation = 1 if $ARGV[$i] eq "--PhraseOrientation";
+  if ($ARGV[$i] eq '--PhraseOrientationPriors') {
+    $phraseOrientationPriorsFile = $ARGV[++$i];
+    next;
+  }
 
   $otherExtractArgs .= $ARGV[$i] ." ";
 }
@@ -219,6 +226,32 @@ if (defined($glueFile)) {
   print STDERR `$cmd`;
 }
 
+# phrase orientation priors (GHKM extraction)
+if ($phraseOrientation && defined($phraseOrientationPriorsFile)) {
+  print STDERR "Merging phrase orientation priors\n";
+
+  my @orientationPriorsCountFiles = glob("$TMPDIR/*.phraseOrientationPriors");
+  my %priorCounts;
+
+  foreach my $filenamePhraseOrientationPriors (@orientationPriorsCountFiles) {
+    if (-f $filenamePhraseOrientationPriors) {
+      open my $infilePhraseOrientationPriors, '<', $filenamePhraseOrientationPriors or die "cannot open $filenamePhraseOrientationPriors: $!";
+      while (my $line = <$infilePhraseOrientationPriors>) { 
+        print $line; 
+        my ($key, $value) = split / /, $line;
+        $priorCounts{$key} += $value;
+      }
+      close $infilePhraseOrientationPriors;
+    }
+  }
+
+  open my $outPhraseOrientationPriors, '>', $phraseOrientationPriorsFile or die "cannot open $phraseOrientationPriorsFile: $!";
+  foreach my $key (sort keys %priorCounts) {
+    print $outPhraseOrientationPriors $key." ".$priorCounts{$key}."\n";
+  }
+  close($outPhraseOrientationPriors);
+}
+
 # delete temporary files
 $cmd = "rm -rf $TMPDIR \n";
 print STDERR $cmd;
diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl
index 22ecc5ff9..da8e677bc 100755
--- a/scripts/training/train-model.perl
+++ b/scripts/training/train-model.perl
@@ -32,7 +32,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
    $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
    @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
    $_DONT_ZIP,  $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
-   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
+   $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
    $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
    $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
    $_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
@@ -110,6 +110,8 @@ $_HELP = 1
 		       'unknown-word-soft-matches-file=s' => \$_UNKNOWN_WORD_SOFT_MATCHES_FILE, # give dummy label to unknown word, and allow soft matches to all other labels (with cost determined by sparse features)
 		       'ghkm' => \$_GHKM,
 		       'ghkm-tree-fragments' => \$_GHKM_TREE_FRAGMENTS,
+		       'ghkm-phrase-orientation' => \$_GHKM_PHRASE_ORIENTATION,
+		       'phrase-orientation-priors-file=s' => \$_PHRASE_ORIENTATION_PRIORS_FILE, # currently relevant for GHKM extraction only; phrase orientation for PBT has different implementation
 		       'pcfg' => \$_PCFG,
 		       'alt-direct-rule-score-1' => \$_ALT_DIRECT_RULE_SCORE_1,
 		       'alt-direct-rule-score-2' => \$_ALT_DIRECT_RULE_SCORE_2,
@@ -1426,6 +1428,8 @@ sub extract_phrase {
         $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
         $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
         $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
+        $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
+        $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if defined($_PHRASE_ORIENTATION_PRIORS_FILE);
         if (!defined($_GHKM)) {
           $cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX;
           $cmd .= " --TargetSyntax" if $_TARGET_SYNTAX;
@@ -1550,6 +1554,9 @@ sub score_phrase_phrase_extract {
     my $NEG_LOG_PROB = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NegLogProb/);
     my $NO_LEX = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NoLex/);
     my $MIN_COUNT_HIERARCHICAL = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /MinCountHierarchical ([\d\.]+)/) ? $1 : undef;
+    my $SOURCE_LABELS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabels/);
+    my $SOURCE_LABEL_COUNTS_LHS = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelCountsLHS/);
+    my $SOURCE_LABEL_SET = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SourceLabelSet/);
     my $SPAN_LENGTH = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SpanLength/);
     my $CORE_SCORE_OPTIONS = "";
     $CORE_SCORE_OPTIONS .= " --LogProb" if $LOG_PROB;
@@ -1557,6 +1564,9 @@ sub score_phrase_phrase_extract {
     $CORE_SCORE_OPTIONS .= " --NoLex" if $NO_LEX;
 	$CORE_SCORE_OPTIONS .= " --Singleton" if $SINGLETON;
 	$CORE_SCORE_OPTIONS .= " --CrossedNonTerm" if $CROSSEDNONTERM;
+    $CORE_SCORE_OPTIONS .= " --SourceLabels" if $SOURCE_LABELS;
+    $CORE_SCORE_OPTIONS .= " --SourceLabelCountsLHS " if $SOURCE_LABEL_COUNTS_LHS;
+    $CORE_SCORE_OPTIONS .= " --SourceLabelSet " if $SOURCE_LABEL_SET;
 
     my $substep = 1;
     my $isParent = 1;
@@ -1597,6 +1607,8 @@ sub score_phrase_phrase_extract {
         $cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
         $cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
         $cmd .= " --TreeFragments" if $_GHKM_TREE_FRAGMENTS;
+        $cmd .= " --PhraseOrientation" if $_GHKM_PHRASE_ORIENTATION;
+        $cmd .= " --PhraseOrientationPriors $_PHRASE_ORIENTATION_PRIORS_FILE" if $_GHKM_PHRASE_ORIENTATION && defined($_PHRASE_ORIENTATION_PRIORS_FILE);
         $cmd .= " $DOMAIN" if $DOMAIN;
         $cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
         $cmd .= " --FlexibilityScore=$FLEX_SCORER" if $_FLEXIBILITY_SCORE;