Merge remote-tracking branch 'origin/master' into miramerge

Conflicts: moses/src/PhraseDictionary.cpp moses/src/TargetPhrase.cpp moses/src/TargetPhrase.h
author: Barry Haddow <barry.haddow@gmail.com> 2012-10-14 17:18:03 +0400
committer: Barry Haddow <barry.haddow@gmail.com> 2012-10-14 17:18:03 +0400
commit: 61ae24aa5d1db2c117c1aa90ca65ef4fe41490fa (patch)
tree: 60d5814cfcabe7325ff7b0a8dfbe8c0e3d1e1a7f /contrib
parent: 732b299ba3612094af67f4e336011fe337089e8a (diff)
parent: 0d9095983d70a032fb596366e2445e338301b212 (diff)
24 files changed, 20 insertions, 3899 deletions
diff --git a/contrib/fuzzy-match/Makefile b/contrib/fuzzy-match/Makefile
deleted file mode 100644
index 5bb884a51..000000000
--- a/contrib/fuzzy-match/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-all: suffix-test fuzzy-match fuzzy-match2
-
-clean: 
-	rm -f *.o
-
-.cpp.o:
-	g++ -O6 -g -c $<
-
-suffix-test: Vocabulary.o SuffixArray.o suffix-test.o
-	g++ Vocabulary.o SuffixArray.o suffix-test.o -o suffix-test
-
-fuzzy-match: Vocabulary.o SuffixArray.o old/fuzzy-match.o
-	g++ Vocabulary.o SuffixArray.o fuzzy-match.o -o fuzzy-match
-
-fuzzy-match2: Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o
-	g++ Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o -o fuzzy-match2
diff --git a/contrib/fuzzy-match/Match.h b/contrib/fuzzy-match/Match.h
deleted file mode 100644
index 6fc8bb42f..000000000
--- a/contrib/fuzzy-match/Match.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//
-//  Match.h
-//  fuzzy-match
-//
-//  Created by Hieu Hoang on 25/07/2012.
-//  Copyright 2012 __MyCompanyName__. All rights reserved.
-//
-
-#ifndef fuzzy_match_Match_h
-#define fuzzy_match_Match_h
-
-/* data structure for n-gram match between input and corpus */
-
-class Match {
-public:
-	int input_start;
-	int input_end;
-	int tm_start;
-	int tm_end;
-	int min_cost;
-	int max_cost;
-	int internal_cost;
-	Match( int is, int ie, int ts, int te, int min, int max, int i )
-  :input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
-  {}
-};
-
-
-#endif
diff --git a/contrib/fuzzy-match/SentenceAlignment.h b/contrib/fuzzy-match/SentenceAlignment.h
deleted file mode 100644
index 4d92fd635..000000000
--- a/contrib/fuzzy-match/SentenceAlignment.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//
-//  SentenceAlignment.h
-//  fuzzy-match
-//
-//  Created by Hieu Hoang on 25/07/2012.
-//  Copyright 2012 __MyCompanyName__. All rights reserved.
-//
-
-#ifndef fuzzy_match_SentenceAlignment_h
-#define fuzzy_match_SentenceAlignment_h
-
-#include <sstream>
-#include "Vocabulary.h"
-
-extern Vocabulary vocabulary;
-
-struct SentenceAlignment
-{
-  int count;
-  vector< WORD_ID > target;
-  vector< pair<int,int> > alignment;
-  
-  SentenceAlignment()
-  {}
-  
-  string getTargetString() const
-  {
-    stringstream strme;
-    for (size_t i = 0; i < target.size(); ++i) {
-      const WORD &word = vocabulary.GetWord(target[i]);
-      strme << word << " ";
-    }
-    return strme.str();
-  }
-  
-  string getAlignmentString() const
-  {
-    stringstream strme;
-    for (size_t i = 0; i < alignment.size(); ++i) {
-      const pair<int,int> &alignPair = alignment[i];
-      strme << alignPair.first << "-" << alignPair.second << " ";
-    }
-    return strme.str();
-  }
-  
-};
-
-#endif
diff --git a/contrib/fuzzy-match/SuffixArray.cpp b/contrib/fuzzy-match/SuffixArray.cpp
deleted file mode 100644
index e0aa3da91..000000000
--- a/contrib/fuzzy-match/SuffixArray.cpp
+++ /dev/null
@@ -1,244 +0,0 @@
-#include "SuffixArray.h"
-#include <string>
-#include <stdlib.h>
-#include <cstring>
-
-using namespace std;
-
-SuffixArray::SuffixArray( string fileName ) 
-{
-	m_vcb.StoreIfNew( "<uNk>" );
-	m_endOfSentence = m_vcb.StoreIfNew( "<s>" );
-
-	ifstream extractFile;
-	char line[LINE_MAX_LENGTH];
-
-	// count the number of words first;
-	extractFile.open(fileName.c_str());
-	istream *fileP = &extractFile;
-	m_size = 0;
-	size_t sentenceCount = 0;
-	while(!fileP->eof()) {
-		SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
-		if (fileP->eof()) break;
-		vector< WORD_ID > words = m_vcb.Tokenize( line );
-		m_size += words.size() + 1;
-		sentenceCount++;
-	}
-	extractFile.close();
-	cerr << m_size << " words (incl. sentence boundaries)" << endl;
-
-	// allocate memory
-	m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
-	m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
-	m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
-	m_sentence = (size_t*) calloc( sizeof( size_t ), m_size );
-	m_sentenceLength = (char*) calloc( sizeof( char ), sentenceCount );
-
-	// fill the array
-	int wordIndex = 0;
-	int sentenceId = 0;
-	extractFile.open(fileName.c_str());
-	fileP = &extractFile;
-	while(!fileP->eof()) {
-		SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
-		if (fileP->eof()) break;
-		vector< WORD_ID > words = m_vcb.Tokenize( line );
-		vector< WORD_ID >::const_iterator i;
-		
-		for( i=words.begin(); i!=words.end(); i++)
-		{
-			m_index[ wordIndex ] = wordIndex;
-			m_sentence[ wordIndex ] = sentenceId;
-			m_wordInSentence[ wordIndex ] = i-words.begin();
-			m_array[ wordIndex++ ] = *i;
-		}
-		m_index[ wordIndex ] = wordIndex;
-		m_array[ wordIndex++ ] = m_endOfSentence;
-		m_sentenceLength[ sentenceId++ ] = words.size();
-	}
-	extractFile.close();
-	cerr << "done reading " << wordIndex << " words, " << sentenceId << " sentences." << endl;
-	// List(0,9);
-
-	// sort
-	m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
-	Sort( 0, m_size-1 );
-	free( m_buffer );
-	cerr << "done sorting" << endl;
-}
-
-// good ol' quick sort
-void SuffixArray::Sort(INDEX start, INDEX end) {
-	if (start == end) return;
-	INDEX mid = (start+end+1)/2;
-	Sort( start, mid-1 );
-	Sort( mid, end );
-
-	// merge
-	int i = start;
-	int j = mid;
-	int k = 0;
-	int length = end-start+1;
-	while( k<length )
-	{
-		if (i == mid ) 
-		{
-			m_buffer[ k++ ] = m_index[ j++ ];
-		}
-		else if (j > end ) 
-		{
-			m_buffer[ k++ ] = m_index[ i++ ];
-		}
-		else {
-			if (CompareIndex( m_index[i], m_index[j] ) < 0) 
-			{
-				m_buffer[ k++ ] = m_index[ i++ ];
-			}	
-			else 
-			{
-				m_buffer[ k++ ] = m_index[ j++ ];
-			}
-		}
-	}
-	
-	memcpy( ((char*)m_index) + sizeof( INDEX ) * start,
-					((char*)m_buffer), sizeof( INDEX ) * (end-start+1) );
-}
-
-SuffixArray::~SuffixArray()
-{ 
-	free(m_index); 
-	free(m_array);
-}
-
-int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
-{
-	// skip over identical words
-	INDEX offset = 0;
-	while( a+offset < m_size &&
-				 b+offset < m_size &&
-				 m_array[ a+offset ] == m_array[ b+offset ] )
-	{ offset++; }
-	
-	if( a+offset == m_size ) return -1;
-	if( b+offset == m_size ) return 1;
-	return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
-}
-
-inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
-{
-	// cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
-	return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
-}
-
-int SuffixArray::Count( const vector< WORD > &phrase )
-{
-	INDEX dummy;
-	return LimitedCount( phrase, m_size, dummy, dummy, 0, m_size-1 );
-}
-
-bool SuffixArray::MinCount( const vector< WORD > &phrase, INDEX min )
-{
-	INDEX dummy;
-	return LimitedCount( phrase, min, dummy, dummy, 0, m_size-1 ) >= min;
-}
-
-bool SuffixArray::Exists( const vector< WORD > &phrase )
-{
-	INDEX dummy;
-	return LimitedCount( phrase, 1, dummy, dummy, 0, m_size-1 ) == 1;
-}
-
-int SuffixArray::FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
-{
-	return LimitedCount( phrase, m_size, firstMatch, lastMatch, search_start, search_end );
-}
-
-int SuffixArray::LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
-{
-	// cerr << "FindFirst\n";
-	INDEX start = search_start;
-	INDEX end = (search_end == -1) ? (m_size-1) : search_end;
-	INDEX mid = FindFirst( phrase, start, end );
-	// cerr << "done\n";
-	if (mid == m_size) return 0; // no matches
-	if (min == 1) return 1;      // only existance check
-
-	int matchCount = 1;
-
-	//cerr << "before...\n";
-	firstMatch = FindLast( phrase, mid, start, -1 );
-	matchCount += mid - firstMatch;
-
-	//cerr << "after...\n";
-	lastMatch = FindLast( phrase, mid, end, 1 );
-	matchCount += lastMatch - mid;
-
-	return matchCount;
-}
-
-SuffixArray::INDEX SuffixArray::FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction )
-{
-	end += direction;
-	while(true)
-	{
-		INDEX mid = ( start + end + (direction>0 ? 0 : 1) )/2;
-		
-		int match = Match( phrase, mid );
-		int matchNext = Match( phrase, mid+direction );
-		//cerr << "\t" << start << ";" << mid << ";" << end << " -> " << match << "," << matchNext << endl;
-		
-		if (match == 0 && matchNext != 0) return mid;
-
-		if (match == 0) // mid point is a match
-			start = mid;
-		else
-			end = mid;
-	}
-}
-
-SuffixArray::INDEX SuffixArray::FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end )
-{
-	while(true)
-	{
-		INDEX mid = ( start + end + 1 )/2;
-		//cerr << "FindFirst(" << start << ";" << mid << ";" << end << ")\n";
-		int match = Match( phrase, mid );
-		
-		if (match == 0) return mid;
-		if (start >= end && match != 0 ) return m_size;
-		
-		if (match > 0)
-			start = mid+1;
-		else
-			end = mid-1;	
-	}
-}
-
-int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
-{
-	INDEX pos = m_index[ index ];
-	for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++)
-	{
-		int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
-		// cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
-		if (match != 0) 
-			return match;
-	}
-	return 0;
-}
-
-void SuffixArray::List(INDEX start, INDEX end)
-{
-	for(INDEX i=start; i<=end; i++)
-	{
-		INDEX pos = m_index[ i ];
-		// cerr << i << ":" << pos << "\t";
-		for(int j=0; j<5 && j+pos<m_size; j++)
-		{
-			cout << " " << m_vcb.GetWord( m_array[ pos+j ] );
-		}
-		// cerr << "\n";
-	}
-}
diff --git a/contrib/fuzzy-match/SuffixArray.h b/contrib/fuzzy-match/SuffixArray.h
deleted file mode 100644
index 2deed4c32..000000000
--- a/contrib/fuzzy-match/SuffixArray.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#include "Vocabulary.h"
-
-#pragma once
-
-#define LINE_MAX_LENGTH 10000
-
-
-class SuffixArray 
-{
-public:
-	typedef unsigned int INDEX;
-
-private:
-	WORD_ID *m_array;
-	INDEX *m_index;
-	INDEX *m_buffer;
-	char *m_wordInSentence;
-	size_t *m_sentence;
-	char *m_sentenceLength;
-	WORD_ID m_endOfSentence;
-	Vocabulary m_vcb;
-	INDEX m_size;
-
-public:
-	SuffixArray( string fileName );
-	~SuffixArray();
-
-	void Sort(INDEX start, INDEX end);
-	int CompareIndex( INDEX a, INDEX b ) const;
-	inline int CompareWord( WORD_ID a, WORD_ID b ) const;
-	int Count( const vector< WORD > &phrase );
-	bool MinCount( const vector< WORD > &phrase, INDEX min );
-	bool Exists( const vector< WORD > &phrase );
-	int FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
-	int LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
-	INDEX FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end );
-	INDEX FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction );
-	int Match( const vector< WORD > &phrase, INDEX index );
-	void List( INDEX start, INDEX end );
-	inline INDEX GetPosition( INDEX index ) { return m_index[ index ]; }
-	inline size_t GetSentence( INDEX position ) { return m_sentence[position]; }
-	inline char GetWordInSentence( INDEX position ) { return m_wordInSentence[position]; }
-	inline char GetSentenceLength( size_t sentenceId ) { return m_sentenceLength[sentenceId]; }
-	inline INDEX GetSize() { return m_size; }
-};
diff --git a/contrib/fuzzy-match/Util.cpp b/contrib/fuzzy-match/Util.cpp
deleted file mode 100644
index 4d750791e..000000000
--- a/contrib/fuzzy-match/Util.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-//
-//  Util.cpp
-//  fuzzy-match
-//
-//  Created by Hieu Hoang on 26/07/2012.
-//  Copyright 2012 __MyCompanyName__. All rights reserved.
-//
-
-#include <iostream>
-#include <stdio.h>
-#include "Util.h"
-#include "SentenceAlignment.h"
-#include "SuffixArray.h"
-
-void load_corpus( const char* fileName, vector< vector< WORD_ID > > &corpus )
-{ // source 
-	ifstream fileStream;
-	fileStream.open(fileName);
-	if (!fileStream) {
-		cerr << "file not found: " << fileName << endl;
-		exit(1);
-	}
-  cerr << "loading " << fileName << endl;
-
-	istream *fileStreamP = &fileStream;
-  
-	char line[LINE_MAX_LENGTH];
-	while(true)
-	{
-		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-		if (fileStreamP->eof()) break;
-		corpus.push_back( vocabulary.Tokenize( line ) );
-	}
-}
-
-void load_target( const char* fileName, vector< vector< SentenceAlignment > > &corpus)
-{ 
-	ifstream fileStream;
-	fileStream.open(fileName);
-	if (!fileStream) {
-		cerr << "file not found: " << fileName << endl;
-		exit(1);
-	}
-  cerr << "loading " << fileName << endl;
-
-	istream *fileStreamP = &fileStream;
-  
-  WORD_ID delimiter = vocabulary.StoreIfNew("|||");
-  
-  int lineNum = 0;
-	char line[LINE_MAX_LENGTH];
-	while(true)
-	{
-		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-		if (fileStreamP->eof()) break;
-    
-    vector<WORD_ID> toks = vocabulary.Tokenize( line );
-    
-    corpus.push_back(vector< SentenceAlignment >());
-    vector< SentenceAlignment > &vec = corpus.back();
-    
-    vec.push_back(SentenceAlignment());
-    SentenceAlignment *sentence = &vec.back();
-    
-    const WORD &countStr = vocabulary.GetWord(toks[0]);
-    sentence->count = atoi(countStr.c_str());
-    
-    for (size_t i = 1; i < toks.size(); ++i) {
-      WORD_ID wordId = toks[i];
-      
-      if (wordId == delimiter) {
-        // target and alignments can have multiple sentences.
-        vec.push_back(SentenceAlignment());
-        sentence = &vec.back();
-        
-        // count
-        ++i;
-        
-        const WORD &countStr = vocabulary.GetWord(toks[i]);
-        sentence->count = atoi(countStr.c_str());
-      }
-      else {
-        // just a normal word, add
-        sentence->target.push_back(wordId);
-      }
-    }
-    
-    ++lineNum;
-    
-	}
-  
-}
-
-
-void load_alignment( const char* fileName, vector< vector< SentenceAlignment > > &corpus )
-{ 
-  ifstream fileStream;
-	fileStream.open(fileName);
-	if (!fileStream) {
-		cerr << "file not found: " << fileName << endl;
-		exit(1);
-	}
-  cerr << "loading " << fileName << endl;
-
-	istream *fileStreamP = &fileStream;
-  
-  string delimiter = "|||";
-  
-  int lineNum = 0;
-	char line[LINE_MAX_LENGTH];
-	while(true)
-	{
-		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-		if (fileStreamP->eof()) break;
-    
-    vector< SentenceAlignment > &vec = corpus[lineNum];
-    size_t targetInd = 0;
-    SentenceAlignment *sentence = &vec[targetInd];
-    
-    vector<string> toks = Tokenize(line);
-    
-    for (size_t i = 0; i < toks.size(); ++i) {
-      string &tok = toks[i];
-      
-      if (tok == delimiter) {
-        // target and alignments can have multiple sentences.
-        ++targetInd;
-        sentence = &vec[targetInd];
-        
-        ++i;
-      }
-      else {
-        // just a normal alignment, add
-        vector<int> alignPoint = Tokenize<int>(tok, "-");
-        assert(alignPoint.size() == 2);
-        sentence->alignment.push_back(pair<int,int>(alignPoint[0], alignPoint[1]));
-      }
-    }
-    
-    ++lineNum;
-    
-	}
-}
-
-
-
-
diff --git a/contrib/fuzzy-match/Util.h b/contrib/fuzzy-match/Util.h
deleted file mode 100644
index 7bb13d032..000000000
--- a/contrib/fuzzy-match/Util.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//
-//  Util.h
-//  fuzzy-match
-//
-//  Created by Hieu Hoang on 25/07/2012.
-//  Copyright 2012 __MyCompanyName__. All rights reserved.
-//
-
-#ifndef fuzzy_match_Util_h
-#define fuzzy_match_Util_h
-
-#include <vector>
-#include <sstream>
-#include "Vocabulary.h"
-
-class SentenceAlignment;
-
-void load_corpus( const char* fileName, std::vector< std::vector< WORD_ID > > &corpus );
-void load_target( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus);
-void load_alignment( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus );
-
-/**
- * Convert vector of type T to string
- */
-template <typename T>
-std::string Join(const std::string& delimiter, const std::vector<T>& items)
-{
-  std::ostringstream outstr;
-  if(items.size() == 0) return "";
-  outstr << items[0];
-  for(unsigned int i = 1; i < items.size(); i++)
-    outstr << delimiter << items[i];
-  return outstr.str();
-}
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-inline T Scan(const std::string &input)
-{
-  std::stringstream stream(input);
-  T ret;
-  stream >> ret;
-  return ret;
-}
-
-//! convert vectors of string to vectors of type T variables
-template<typename T>
-inline std::vector<T> Scan(const std::vector< std::string > &input)
-{
-  std::vector<T> output(input.size());
-  for (size_t i = 0 ; i < input.size() ; i++) {
-    output[i] = Scan<T>( input[i] );
-  }
-  return output;
-}
-
-inline std::vector<std::string> Tokenize(const std::string& str,
-                                         const std::string& delimiters = " \t")
-{
-  std::vector<std::string> tokens;
-  // Skip delimiters at beginning.
-  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
-  // Find first "non-delimiter".
-  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
-  
-  while (std::string::npos != pos || std::string::npos != lastPos) {
-    // Found a token, add it to the vector.
-    tokens.push_back(str.substr(lastPos, pos - lastPos));
-    // Skip delimiters.  Note the "not_of"
-    lastPos = str.find_first_not_of(delimiters, pos);
-    // Find next "non-delimiter"
-    pos = str.find_first_of(delimiters, lastPos);
-  }
-  
-  return tokens;
-}
-
-template<typename T>
-inline std::vector<T> Tokenize( const std::string &input
-                               , const std::string& delimiters = " \t")
-{
-  std::vector<std::string> stringVector = Tokenize(input, delimiters);
-  return Scan<T>( stringVector );
-}
-
-
-#endif
diff --git a/contrib/fuzzy-match/Vocabulary.cpp b/contrib/fuzzy-match/Vocabulary.cpp
deleted file mode 100644
index 4492eec95..000000000
--- a/contrib/fuzzy-match/Vocabulary.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
-#include "Vocabulary.h"
-
-// as in beamdecoder/tables.cpp
-vector<WORD_ID> Vocabulary::Tokenize( const char input[] ) {
-  vector< WORD_ID > token;
-  bool betweenWords = true;
-  int start=0;
-  int i=0;
-  for(; input[i] != '\0'; i++) {
-    bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
-    if (!isSpace && betweenWords) {
-      start = i;
-      betweenWords = false;
-    }
-    else if (isSpace && !betweenWords) {
-      token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
-      betweenWords = true;
-    }
-  }
-  if (!betweenWords)
-    token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
-  return token;
-}
-
-WORD_ID Vocabulary::StoreIfNew( const WORD& word ) {
-  map<WORD, WORD_ID>::iterator i = lookup.find( word );
-  
-  if( i != lookup.end() )
-    return i->second;
-
-  WORD_ID id = vocab.size();
-  vocab.push_back( word );
-  lookup[ word ] = id;
-  return id;  
-}
-
-WORD_ID Vocabulary::GetWordID( const WORD &word ) {
-  map<WORD, WORD_ID>::iterator i = lookup.find( word );
-  if( i == lookup.end() )
-    return 0;
-  WORD_ID w= (WORD_ID) i->second;
-  return w;
-}
diff --git a/contrib/fuzzy-match/Vocabulary.h b/contrib/fuzzy-match/Vocabulary.h
deleted file mode 100644
index 3e48847a7..000000000
--- a/contrib/fuzzy-match/Vocabulary.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
-
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <assert.h>
-#include <stdlib.h>
-#include <string>
-#include <queue>
-#include <map>
-#include <cmath>
-
-using namespace std;
-
-#define MAX_LENGTH 10000
-
-#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
-                _IS.getline(_LINE, _SIZE, _DELIM); \
-                if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
-                if (_IS.gcount() == _SIZE-1) { \
-                  cerr << "Line too long! Buffer overflow. Delete lines >=" \
-                    << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
-                    << endl; \
-                    exit(1); \
-                } \
-              }
-
-typedef string WORD;
-typedef unsigned int WORD_ID;
-
-class Vocabulary {
- public:
-  map<WORD, WORD_ID> lookup;
-  vector< WORD > vocab;
-  WORD_ID StoreIfNew( const WORD& );
-  WORD_ID GetWordID( const WORD& );
-  vector<WORD_ID> Tokenize( const char[] );
-  inline WORD &GetWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
-};
diff --git a/contrib/fuzzy-match/fuzzy-match2.cpp b/contrib/fuzzy-match/fuzzy-match2.cpp
deleted file mode 100644
index c1252aa03..000000000
--- a/contrib/fuzzy-match/fuzzy-match2.cpp
+++ /dev/null
@@ -1,460 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <map>
-#include <algorithm>
-#include <iostream>
-#include <fstream>
-#include <cstring>
-#include <time.h>
-#include <fstream>
-#include "SentenceAlignment.h"
-#include "fuzzy-match2.h"
-#include "SuffixArray.h"
-
-/** This implementation is explained in
-       Koehn and Senellart: "Fast Approximate String Matching 
-       with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
-
-using namespace std;
-
-int main(int argc, char* argv[]) 
-{
-	vector< vector< WORD_ID > > source, input;
-	vector< vector< SentenceAlignment > > targetAndAlignment;
-	
-	
-	while(1) {
-		static struct option long_options[] = {
-			{"basic", no_argument, &basic_flag, 1},
-			{"word", no_argument, &lsed_flag, 0},
-			{"unrefined", no_argument, &refined_flag, 0},
-			{"nolengthfilter", no_argument, &length_filter_flag, 0},
-			{"noparse", no_argument, &parse_flag, 0},
-			{"multiple", no_argument, &multiple_flag, 1},
-			{"minmatch", required_argument, 0, 'm'},
-			{0, 0, 0, 0}
-		};
-		int option_index = 0;
-		int c = getopt_long (argc, argv, "m:", long_options, &option_index);
-		if (c == -1) break;
-		switch (c) {
-			case 0:
-//				if (long_options[option_index].flag != 0)
-//					break;
-//				printf ("option %s", long_options[option_index].name);
-//				if (optarg)
-//					printf (" with arg %s", optarg);
-//				printf ("\n");
-				break;
-			case 'm':
-				min_match = atoi(optarg);
-				if (min_match < 1 || min_match > 100) {
-					cerr << "error: --minmatch must have value in range 1..100\n";
-					exit(1);
-				}
-				cerr << "setting min match to " << min_match << endl;
-				break;
-			default:
-				cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
-				exit(1);
-		}
-	}
-	if (lsed_flag) { cerr << "lsed\n"; }
-	if (basic_flag) { cerr << "basic\n"; }
-	if (refined_flag) { cerr << "refined\n"; }
-	if (length_filter_flag) { cerr << "length filter\n"; }
-	if (parse_flag) { cerr << "parse\n"; }
-//	exit(1);
-
-
-	if (optind+4 != argc) {
-		cerr << "syntax: ./fuzzy-match input source target alignment [--basic] [--word] [--minmatch 1..100]\n";
-		exit(1);
-	}
-	
-	load_corpus(argv[optind], input);
-	load_corpus(argv[optind+1], source);
-	load_target(argv[optind+2], targetAndAlignment);
-	load_alignment(argv[optind+3], targetAndAlignment);
-
-  // ./fuzzy-match input corpus [-basic] 
-	
-//	load_corpus("../corpus/tm.truecased.4.en", source);
-//	load_corpus("../corpus/tm.truecased.4.it", target);
-//	load_corpus("../evaluation/test.input.tc.4", input);
-
-//	load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
-//	load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
-
-//	load_corpus("../corpus/tm.truecased.16.en", source);
-//	load_corpus("../evaluation/test.input.tc.16", input);
-
-	if (basic_flag) {
-		cerr << "using basic method\n";
-		clock_t start_main_clock2 = clock();
-		basic_fuzzy_match( source, input );
-		cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
-		exit(1);
-	}
-
-	cerr << "number of input sentences " << input.size() << endl;
-
-	cerr << "creating suffix array...\n";
-//	SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
-//	SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
-	SuffixArray suffixArray( argv[optind+1] );
-	
-	clock_t start_main_clock = clock();
-
-	// looping through all input sentences...
-	cerr << "looping...\n";
-	for(unsigned int sentenceInd = 0; sentenceInd < input.size(); sentenceInd++)
-	{
-		clock_t start_clock = clock();
-		// if (i % 10 == 0) cerr << ".";
-
-		// establish some basic statistics
-
-		// int input_length = compute_length( input[i] );
-		int input_length = input[sentenceInd].size();
-		int best_cost = input_length * (100-min_match) / 100 + 1;
-
-		int match_count = 0; // how many substring matches to be considered
-		//cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
-
-		// find match ranges in suffix array
-		vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
-		for(size_t start=0;start<input[sentenceInd].size();start++) 
-		{
-			SuffixArray::INDEX prior_first_match = 0;
-			SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
-			vector< string > substring;
-			bool stillMatched = true;
-			vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
-			//cerr << "start: " << start;
-			for(int word=start; stillMatched && word<input[sentenceInd].size(); word++)
-			{
-				substring.push_back( vocabulary.GetWord( input[sentenceInd][word] ) );
-
-				// only look up, if needed (i.e. no unnecessary short gram lookups)
-//				if (! word-start+1 <= short_match_max_length( input_length ) )
-				//			{
-				SuffixArray::INDEX first_match, last_match;
-				stillMatched = false;
-				if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
-				{
-					stillMatched = true;
-					matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
-					//cerr << " (" << first_match << "," << last_match << ")";
-					//cerr << " " << ( last_match - first_match + 1 );
-					prior_first_match = first_match;
-					prior_last_match = last_match;
-				}
-					//}
-			}
-			//cerr << endl;
-			match_range.push_back( matchedAtThisStart );
-		}
-
-		clock_t clock_range = clock();
-
-		map< int, vector< Match > > sentence_match;
-		map< int, int > sentence_match_word_count;
-
-		// go through all matches, longest first
-		for(int length = input[sentenceInd].size(); length >= 1; length--)
-		{
-			// do not create matches, if these are handled by the short match function
-			if (length <= short_match_max_length( input_length ) )
-			{
-				continue;
-			}
-
-			unsigned int count = 0;
-			for(int start = 0; start <= input[sentenceInd].size() - length; start++)
-			{
-				if (match_range[start].size() >= length)
-				{
-					pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
-					// cerr << " (" << range.first << "," << range.second << ")";
-					count += range.second - range.first + 1;
-
-					for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
-					{
-						int position = suffixArray.GetPosition( i );
-
-						// sentence length mismatch
-						size_t sentence_id = suffixArray.GetSentence( position );
-						int sentence_length = suffixArray.GetSentenceLength( sentence_id );
-						int diff = abs( (int)sentence_length - (int)input_length );
-						// cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
-						//if (length <= 2 && input_length>=5 &&
-						//		sentence_match.find( sentence_id ) == sentence_match.end())
-						//	continue;
-
-						if (diff > best_cost)
-							continue;
-
-						// compute minimal cost
-						int start_pos = suffixArray.GetWordInSentence( position );
-						int end_pos = start_pos + length-1;
-						// cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. " 
-						// << start << "-" << (start+length-1) << " (" << input_length << ")"; 
-						// different number of prior words -> cost is at least diff
-						int min_cost = abs( start - start_pos );
-						
-						// same number of words, but not sent. start -> cost is at least 1 
-						if (start == start_pos && start>0)
-							min_cost++;
-
-						// different number of remaining words -> cost is at least diff
-						min_cost += abs( ( sentence_length-1 - end_pos ) -
-														 ( input_length-1 - (start+length-1) ) );
-
-						// same number of words, but not sent. end -> cost is at least 1
-						if ( sentence_length-1 - end_pos ==
-								 input_length-1 - (start+length-1)
-								 && end_pos != sentence_length-1 )
-							min_cost++;
-
-						// cerr << " -> min_cost " << min_cost;
-						if (min_cost > best_cost)
-							continue;
-
-						// valid match
-						match_count++;
-
-						// compute maximal cost
-						int max_cost = max( start, start_pos )
-							+ max( sentence_length-1 - end_pos,
-										 input_length-1 - (start+length-1) );
-						// cerr << ", max_cost " << max_cost;
-						
-						Match m = Match( start, start+length-1, 
-														 start_pos, start_pos+length-1, 
-														 min_cost, max_cost, 0);
-						sentence_match[ sentence_id ].push_back( m );
-						sentence_match_word_count[ sentence_id ] += length;
-
-						if (max_cost < best_cost)
-						{
-							best_cost = max_cost;
-							if (best_cost == 0) break;
-						}
-						//if (match_count >= MAX_MATCH_COUNT) break;
-					}
-				}
-				// cerr << endl;
-				if (best_cost == 0) break;
-				//if (match_count >= MAX_MATCH_COUNT) break;
-			}
-			// cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
-
-			if (best_cost == 0) break;
-			//if (match_count >= MAX_MATCH_COUNT) break;
-		}
-		cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
-
-		clock_t clock_matches = clock();
-
-		// consider each sentence for which we have matches
-		int old_best_cost = best_cost;
-		int tm_count_word_match = 0;
-		int tm_count_word_match2 = 0;
-		int pruned_match_count = 0;
-		if (short_match_max_length( input_length ))
-		{
-			init_short_matches( input[sentenceInd] );
-		}
-		vector< int > best_tm;
-		typedef map< int, vector< Match > >::iterator I;
-
-		clock_t clock_validation_sum = 0;
-
-		for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
-		{
-			int tmID = tm->first;
-			int tm_length = suffixArray.GetSentenceLength(tmID);
-			vector< Match > &match = tm->second;
-			add_short_matches( match, source[tmID], input_length, best_cost );
-
-			//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
-
-			// quick look: how many words are matched
-			int words_matched = 0;
-			for(int m=0;m<match.size();m++) {
-
-				if (match[m].min_cost <= best_cost) // makes no difference
-					words_matched += match[m].input_end - match[m].input_start + 1;
-			}
-			if (max(input_length,tm_length) - words_matched > best_cost)
-			{
-				if (length_filter_flag) continue;
-			}
-			tm_count_word_match++;
-
-			// prune, check again how many words are matched
-			vector< Match > pruned = prune_matches( match, best_cost );
-			words_matched = 0;
-			for(int p=0;p<pruned.size();p++) {
-				words_matched += pruned[p].input_end - pruned[p].input_start + 1;
-			}
-			if (max(input_length,tm_length) - words_matched > best_cost)
-			{
-				if (length_filter_flag) continue;
-			}
-			tm_count_word_match2++;
-
-			pruned_match_count += pruned.size();
-			int prior_best_cost = best_cost;
-			int cost;
-
-			clock_t clock_validation_start = clock();
-			if (! parse_flag ||
-			    pruned.size()>=10) // to prevent worst cases
-			{
-				string path;
-				cost = sed( input[sentenceInd], source[tmID], path, false );
-				if (cost <  best_cost) 
-				{
-					best_cost = cost;
-				}
-			}
-
-			else
-			{
-				cost = parse_matches( pruned, input_length, tm_length, best_cost );
-				if (prior_best_cost != best_cost)
-				{
-					best_tm.clear();
-				}
-			}
-			clock_validation_sum += clock() - clock_validation_start;
-			if (cost == best_cost)
-			{
-				best_tm.push_back( tmID );
-			}
-		}
-		cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
-		cerr << "tm considered: " << sentence_match.size()
-				 << " word-matched: " << tm_count_word_match 
-				 << " word-matched2: " << tm_count_word_match2 
-				 << " best: " << best_tm.size() << endl;
-
-		cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
-
-    // create xml and extract files
-    string inputStr, sourceStr;
-    for (size_t pos = 0; pos < input_length; ++pos) {
-      inputStr += vocabulary.GetWord(input[sentenceInd][pos]) + " ";
-    }
-    
-		// do not try to find the best ... report multiple matches
-		if (multiple_flag) {
-			int input_letter_length = compute_length( input[sentenceInd] );
-			for(int si=0; si<best_tm.size(); si++) {
-				int s = best_tm[si];
-				string path;
-				unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
-				// do not report multiple identical sentences, but just their count
-				cout << sentenceInd << " "; // sentence number
-				cout << letter_cost << "/" << input_letter_length << " ";
-				cout << "(" << best_cost <<"/" << input_length <<") ";
-				cout << "||| " << s << " ||| " << path << endl;
-        
-        vector<WORD_ID> &sourceSentence = source[s];
-        vector<SentenceAlignment> &targets = targetAndAlignment[s];
-        create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, path);
-
-			}
-		} // if (multiple_flag)
-    else {
-
-      // find the best matches according to letter sed
-      string best_path = "";
-      int best_match = -1;
-      int best_letter_cost;
-      if (lsed_flag) {
-        best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
-        for(int si=0; si<best_tm.size(); si++)
-        {
-          int s = best_tm[si];
-          string path;
-          unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
-          if (letter_cost < best_letter_cost)
-          {
-            best_letter_cost = letter_cost;
-            best_path = path;
-            best_match = s;
-          }
-        }
-      }
-      // if letter sed turned off, just compute path for first match
-      else {
-        if (best_tm.size() > 0) {
-          string path;
-          sed( input[sentenceInd], source[best_tm[0]], path, false );
-          best_path = path;
-          best_match = best_tm[0];
-        }
-      }
-      cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
-           << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
-           << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
-           << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
-           << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
-           << " )" << endl;
-      if (lsed_flag) {
-        cout << best_letter_cost << "/" << compute_length( input[sentenceInd] ) << " (";
-      }
-      cout << best_cost <<"/" << input_length;
-      if (lsed_flag) 	cout << ")";
-      cout << " ||| " << best_match << " ||| " << best_path << endl;
-
-      // creat xml & extracts
-      vector<WORD_ID> &sourceSentence = source[best_match];
-      vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
-      create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, best_path);
-
-    } // else if (multiple_flag)
-    
-    
-  }
-	cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
-	
-}
-
-void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string  &path)
-{
-  string sourceStr;
-  for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
-    WORD_ID wordId = sourceSentence[pos];
-    sourceStr += vocabulary.GetWord(wordId) + " ";
-  }
-    
-  char *inputFileName = tmpnam(NULL);
-  ofstream inputFile(inputFileName);
-
-  for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
-    const SentenceAlignment &sentenceAlignment = targets[targetInd]; 
-    string targetStr = sentenceAlignment.getTargetString();
-    string alignStr = sentenceAlignment.getAlignmentString();
-    
-    inputFile 
-      << sentenceInd << endl
-      << cost << endl
-      << sourceStr << endl 
-      << inputStr << endl
-      << targetStr << endl
-      << alignStr << endl
-      << path << endl
-      << sentenceAlignment.count << endl;
-
-  }
-  
-  string cmd = string("perl create_xml.perl < ") + inputFileName;
-  cerr << cmd << endl;
-  inputFile.close();
-  
-}
diff --git a/contrib/fuzzy-match/fuzzy-match2.h b/contrib/fuzzy-match/fuzzy-match2.h
deleted file mode 100644
index 614bf971f..000000000
--- a/contrib/fuzzy-match/fuzzy-match2.h
+++ /dev/null
@@ -1,561 +0,0 @@
-//
-//  fuzzy-match2.h
-//  fuzzy-match
-//
-//  Created by Hieu Hoang on 25/07/2012.
-//  Copyright 2012 __MyCompanyName__. All rights reserved.
-//
-
-#ifndef fuzzy_match_fuzzy_match2_h
-#define fuzzy_match_fuzzy_match2_h
-
-#include <string>
-#include <sstream>
-#include <vector>
-#include "Vocabulary.h"
-#include "SuffixArray.h"
-#include "Util.h"
-#include "Match.h"
-
-#define MAX_MATCH_COUNT 10000000
-
-Vocabulary vocabulary;
-
-int basic_flag = false;
-int lsed_flag = true;
-int refined_flag = true;
-int length_filter_flag = true;
-int parse_flag = true;
-int min_match = 70;
-int multiple_flag = false;
-int multiple_slack = 0;
-int multiple_max = 100;
-map< WORD_ID,vector< int > > single_word_index;
-// global cache for word pairs
-map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
-
-void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string  &path);
-
-
-
-/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
-
-unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
-{
-	// check if already computed -> lookup in cache
-	pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
-	map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
-	if (lookup != lsed.end())
-	{
-		return (lookup->second);
-	}
-  
-	// get surface strings for word indices
-	const string &a = vocabulary.GetWord( aIdx );
-	const string &b = vocabulary.GetWord( bIdx );
-  
-	// initialize cost matrix
-	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int*  ), a.size()+1 );
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
-		cost[i][0] = i;
-	}
-	for( unsigned int j=0; j<=b.size(); j++ ) {
-		cost[0][j] = j;
-	}
-  
-	// core string edit distance loop
-	for( unsigned int i=1; i<=a.size(); i++ ) {
-		for( unsigned int j=1; j<=b.size(); j++ ) {
-      
-			unsigned int ins = cost[i-1][j] + 1;
-			unsigned int del = cost[i][j-1] + 1;
-			bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
-			unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
-      
-			unsigned int min = (ins < del) ? ins : del;
-			min = (diag < min) ? diag : min;
-      
-			cost[i][j] = min;
-		}
-	}
-  
-	// clear out memory
-	unsigned int final = cost[a.size()][b.size()];
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		free( cost[i] );
-	}
-	free( cost );
-  
-	// cache and return result
-	lsed[ pIdx ] = final;
-	return final;
-}
-
-/* string edit distance implementation */
-
-unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
-  
-	// initialize cost and path matrices
-	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
-	char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
-	
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
-		path[i] = (char*) calloc( sizeof(char), b.size()+1 );
-		if (i>0)
-		{
-			cost[i][0] = cost[i-1][0];
-			if (use_letter_sed)
-			{
-				cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
-			}
-			else
-			{
-				cost[i][0]++;
-			}
-		}
-		else
-		{
-			cost[i][0] = 0;
-		}
-		path[i][0] = 'I';
-	}
-  
-	for( unsigned int j=0; j<=b.size(); j++ ) {
-		if (j>0) 
-		{
-			cost[0][j] = cost[0][j-1];
-			if (use_letter_sed)
-			{
-				cost[0][j] +=	vocabulary.GetWord( b[j-1] ).size();
-			}
-			else
-			{
-				cost[0][j]++;
-			}
-		}
-		else
-		{
-			cost[0][j] = 0;
-		}
-		path[0][j] = 'D';
-	}
-  
-	// core string edit distance algorithm
-	for( unsigned int i=1; i<=a.size(); i++ ) {
-		for( unsigned int j=1; j<=b.size(); j++ ) {
-			unsigned int ins = cost[i-1][j];
-			unsigned int del = cost[i][j-1];
-			unsigned int match;
-			if (use_letter_sed)
-			{
-				ins += vocabulary.GetWord( a[i-1] ).size();
-				del += vocabulary.GetWord( b[j-1] ).size();
-				match = letter_sed( a[i-1], b[j-1] );
-			}
-			else
-			{
-				ins++;
-				del++;
-				match = ( a[i-1] == b[j-1] ) ? 0 : 1;
-			}
-			unsigned int diag = cost[i-1][j-1] + match;
-      
-			char action = (ins < del) ? 'I' : 'D';
-			unsigned int min = (ins < del) ? ins : del;
-			if (diag < min)
-			{
-				action = (match>0) ? 'S' : 'M';
-				min = diag;
-			}
-      
-			cost[i][j] = min;
-			path[i][j] = action;
-		}
-	}
-  
-	// construct string for best path
-	unsigned int i = a.size();
-	unsigned int j = b.size();
-	best_path = "";
-	while( i>0 || j>0 )
-	{
-		best_path = path[i][j] + best_path;
-		if (path[i][j] == 'I') 
-		{
-			i--;
-		}
-		else if (path[i][j] == 'D') 
-		{
-			j--;
-		}
-		else 
-		{ 
-			i--; 
-			j--;
-		}
-	}
-	
-  
-	// clear out memory
-	unsigned int final = cost[a.size()][b.size()];
-  
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		free( cost[i] );
-		free( path[i] );
-	}
-	free( cost );
-	free( path );
-  
-	// return result
-	return final;
-}
-
-/* utlility function: compute length of sentence in characters 
- (spaces do not count) */
-
-unsigned int compute_length( const vector< WORD_ID > &sentence )
-{
-	unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
-	{
-		length += vocabulary.GetWord( sentence[i] ).size();
-	}
-	return length;
-}
-
-/* brute force method: compare input to all corpus sentences */
-
-int basic_fuzzy_match( vector< vector< WORD_ID > > source, 
-                      vector< vector< WORD_ID > > input ) 
-{
-	// go through input set...
-	for(unsigned int i=0;i<input.size();i++)
-	{
-		bool use_letter_sed = false;
-    
-		// compute sentence length and worst allowed cost
-		unsigned int input_length;
-		if (use_letter_sed)
-		{
-			input_length = compute_length( input[i] );
-		}
-		else
-		{
-			input_length = input[i].size();
-		}
-		unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
-		string best_path = "";
-		int best_match = -1;
-    
-		// go through all corpus sentences
-		for(unsigned int s=0;s<source.size();s++)
-		{
-			int source_length;
-			if (use_letter_sed)
-			{
-				source_length = compute_length( source[s] );
-			}
-			else
-			{
-				source_length = source[s].size();
-			}
-			int diff = abs((int)source_length - (int)input_length);
-			if (length_filter_flag && (diff >= best_cost))
-			{
-				continue;
-			}
-      
-			// compute string edit distance
-			string path;
-			unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
-      
-			// update if new best
-			if (cost < best_cost) 
-			{
-				best_cost = cost;
-				best_path = path;
-				best_match = s;
-			}
-		}
-		cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
-	}
-}
-
-/* definition of short matches
- very short n-gram matches (1-grams) will not be looked up in
- the suffix array, since there are too many matches
- and for longer sentences, at least one 2-gram match must occur */
-
-inline int short_match_max_length( int input_length )
-{
-	if ( ! refined_flag ) 
-		return 0;
-	if ( input_length >= 5 )
-		return 1;
-	return 0;	
-}
-
-/* if we have non-short matches in a sentence, we need to
- take a closer look at it. 
- this function creates a hash map for all input words and their positions
- (to be used by the next function) 
- (done here, because this has be done only once for an input sentence) */
-
-void init_short_matches( const vector< WORD_ID > &input )
-{
-	int max_length = short_match_max_length( input.size() );
-	if (max_length == 0)
-		return;
-  
-	single_word_index.clear();
-	
-	// store input words and their positions in hash map
-	for(int i=0; i<input.size(); i++)
-	{
-		if (single_word_index.find( input[i] ) == single_word_index.end())
-		{
-			vector< int > position_vector;
-			single_word_index[ input[i] ] = position_vector;
-		}
-		single_word_index[ input[i] ].push_back( i );
-	}	
-}
-
-/* add all short matches to list of matches for a sentence */
-
-void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
-{	
-	int max_length = short_match_max_length( input_length );
-	if (max_length == 0)
-		return;
-  
-	int tm_length = tm.size();
-	map< WORD_ID,vector< int > >::iterator input_word_hit;
-	for(int t_pos=0; t_pos<tm.size(); t_pos++)
-	{
-		input_word_hit = single_word_index.find( tm[t_pos] );
-		if (input_word_hit != single_word_index.end())
-		{
-			vector< int > &position_vector = input_word_hit->second;
-			for(int j=0; j<position_vector.size(); j++)
-			{
-				int &i_pos = position_vector[j];
-        
-				// before match
-				int max_cost = max( i_pos , t_pos );
-				int min_cost = abs( i_pos - t_pos );
-				if ( i_pos>0 && i_pos == t_pos ) 
-					min_cost++;
-				
-				// after match
-				max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
-				min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
-				if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
-					min_cost++;
-				
-				if (min_cost <= best_cost)
-				{
-					Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
-					match.push_back( new_match );
-				}
-			}
-		} 
-	}
-}
-
-/* remove matches that are subsumed by a larger match */
-
-vector< Match > prune_matches( const vector< Match > &match, int best_cost )
-{
-	//cerr << "\tpruning";
-	vector< Match > pruned;
-	for(int i=match.size()-1; i>=0; i--)
-	{
-		//cerr << " (" << match[i].input_start << "," << match[i].input_end 
-		//		 << " ; " << match[i].tm_start << "," << match[i].tm_end 
-		//		 << " * " << match[i].min_cost << ")";
-    
-		//if (match[i].min_cost > best_cost)
-		//	continue;
-    
-		bool subsumed = false;
-		for(int j=match.size()-1; j>=0; j--)
-		{
-			if (i!=j // do not compare match with itself
-					&& ( match[i].input_end - match[i].input_start <= 
-              match[j].input_end - match[j].input_start ) // i shorter than j
-					&& ((match[i].input_start == match[j].input_start &&
-							 match[i].tm_start    == match[j].tm_start	) ||
-							(match[i].input_end   == match[j].input_end &&
-							 match[i].tm_end      == match[j].tm_end) ) )
-			{
-				subsumed = true;
-			}
-		}
-		if (! subsumed && match[i].min_cost <= best_cost)
-		{
-			//cerr << "*";
-			pruned.push_back( match[i] );
-		}
-	}
-	//cerr << endl;
-	return pruned;
-}
-
-/* A* parsing method to compute string edit distance */
-
-int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
-{	
-	// cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
-  
-	if (match.size() == 1)
-		return match[0].max_cost;
-	if (match.size() == 0)
-		return input_length+tm_length;
-	
-	int this_best_cost = input_length + tm_length;
-	for(int i=0;i<match.size();i++)
-	{
-		this_best_cost = min( this_best_cost, match[i].max_cost );
-	}
-	// cerr << "\tthis best cost: " << this_best_cost << endl;
-	
-	// bottom up combination of spans
-	vector< vector< Match > > multi_match;
-	multi_match.push_back( match );
-	
-	int match_level = 1;
-	while(multi_match[ match_level-1 ].size()>0)
-	{
-		// init vector
-		vector< Match > empty;
-		multi_match.push_back( empty );
-    
-		for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
-		{
-			int second_level = match_level - first_level -1;
-			//cerr << "\tcombining level " << first_level << " and " << second_level << endl;
-			
-			vector< Match > &first_match  = multi_match[ first_level ];
-			vector< Match > &second_match = multi_match[ second_level ];
-      
-			for(int i1 = 0; i1 < first_match.size(); i1++) {
-				for(int i2 = 0; i2 < second_match.size(); i2++) {
-          
-					// do not combine the same pair twice
-					if (first_level == second_level && i2 <= i1) 
-					{
-						continue;
-					}
-          
-					// get sorted matches (first is before second)
-					Match *first, *second;
-					if (first_match[i1].input_start < second_match[i2].input_start )
-					{
-						first = &first_match[i1];
-						second = &second_match[i2];
-					}
-					else
-					{
-						second = &first_match[i1];
-						first = &second_match[i2];
-					}
-          
-					//cerr << "\tcombining " 
-					//		 << "(" << first->input_start << "," << first->input_end << "), "
-					//		 << first->tm_start << " [" << first->internal_cost << "]"
-					//		 << " with "
-					//		 << "(" << second->input_start << "," << second->input_end << "), "
-					//		 << second->tm_start<< " [" << second->internal_cost << "]"
-					//		 << endl;
-          
-					// do not process overlapping matches
-					if (first->input_end >= second->input_start) 
-					{
-						continue;
-					}
-          
-					// no overlap / mismatch in tm
-					if (first->tm_end >= second->tm_start)
-					{
-						continue;
-					}
-          
-					// compute cost
-					int min_cost = 0;
-					int max_cost = 0;
-          
-					// initial
-					min_cost += abs( first->input_start - first->tm_start );
-					max_cost += max( first->input_start, first->tm_start );				 
-          
-					// same number of words, but not sent. start -> cost is at least 1 
-					if (first->input_start == first->tm_start && first->input_start > 0)
-					{
-						min_cost++;
-					}
-          
-					// in-between
-					int skipped_words = second->input_start - first->input_end -1;
-					int skipped_words_tm = second->tm_start - first->tm_end -1;
-					int internal_cost = max( skipped_words, skipped_words_tm );
-					internal_cost += first->internal_cost + second->internal_cost;
-					min_cost += internal_cost;
-					max_cost += internal_cost;
-					
-					// final
-					min_cost += abs( (tm_length-1 - second->tm_end) -
-                          (input_length-1 - second->input_end) );
-					max_cost += max( (tm_length-1 - second->tm_end),
-                          (input_length-1 - second->input_end) );
-          
-					// same number of words, but not sent. end -> cost is at least 1
-					if ( ( input_length-1 - second->input_end 
-                == tm_length-1 - second->tm_end )
-              && input_length-1 != second->input_end )
-					{
-						min_cost++;
-					}
-          
-					// cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
-          
-					// if worst than best cost, forget it
-					if (min_cost > best_cost)					
-					{
-						continue;
-					}
-					
-					// add match
-					Match new_match( first->input_start,
-                          second->input_end,
-                          first->tm_start,
-                          second->tm_end,
-                          min_cost,
-                          max_cost,
-                          internal_cost);
-					multi_match[ match_level ].push_back( new_match );
-					// cerr << "\tstored\n";
-					
-					// possibly updating this_best_cost
-					if (max_cost < this_best_cost)
-					{
-						// cerr << "\tupdating this best cost to " << max_cost << "\n";
-						this_best_cost = max_cost;
-            
-						// possibly updating best_cost
-						if (max_cost < best_cost)
-						{
-							// cerr << "\tupdating best cost to " << max_cost << "\n";
-							best_cost = max_cost;
-						}					
-					}
-				}
-			}
-		}
-		match_level++;
-	}
-	return this_best_cost;
-}
-
-#endif
diff --git a/contrib/fuzzy-match/make-xml-from-match.perl b/contrib/fuzzy-match/make-xml-from-match.perl
deleted file mode 100644
index b5c213a3d..000000000
--- a/contrib/fuzzy-match/make-xml-from-match.perl
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-my $DEBUG = 1;
-
-my $match_file  = "tm/BEST.acquis-xml-escaped.4.uniq";
-my $source_file = "data/acquis.truecased.4.en.uniq";
-my $target_file = "data/acquis.truecased.4.fr.uniq.most-frequent";
-my $alignment_file = "data/acquis.truecased.4.align.uniq.most-frequent";
-my $out_file = "data/ac-test.input.xml.4.uniq";
-my $in_file = "evaluation/ac-test.input.tc.4";
-
-#my $match_file  = "tm/BEST.acquis-xml-escaped.4";
-#my $source_file = "corpus/acquis.truecased.4.en";
-#my $target_file = "corpus/acquis.truecased.4.fr";
-#my $alignment_file = "model/aligned.4.grow-diag-final-and";
-#my $out_file = "data/ac-test.input.xml.4";
-#my $in_file = "evaluation/ac-test.input.tc.4";
-
-#my $match_file  = "tm/BEST.acquis.with";
-#my $source_file = "../acquis-truecase/corpus/acquis.truecased.190.en";
-#my $target_file = "../acquis-truecase/corpus/acquis.truecased.190.fr";
-#my $alignment_file = "../acquis-truecase/model/aligned.190.grow-diag-final-and";
-#my $out_file = "data/ac-test.input.xml";
-#my $in_file = "evaluation/ac-test.input.tc.1";
-
-my @INPUT = `cat $in_file`; chop(@INPUT);
-my @SOURCE = `cat $source_file`; chop(@SOURCE);
-my @TARGET = `cat $target_file`; chop(@TARGET);
-my @ALIGNMENT = `cat $alignment_file`; chop(@ALIGNMENT);
-
-open(MATCH,$match_file);
-open(FRAME,">$out_file");
-for(my $i=0;$i<4107;$i++) {
-
-    # get match data
-    my $match = <MATCH>;
-    chop($match);
-    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
-
-    # construct frame
-    if ($sentence < 1e9 && $sentence >= 0) {
-	my $frame = &create_xml($SOURCE[$sentence],
-				$INPUT[$i],
-				$TARGET[$sentence],
-				$ALIGNMENT[$sentence],
-				$path);
-	print FRAME $frame."\n";
-    }
-
-    # no frame -> output source
-    else {
-	print FRAME $INPUT[$i]."\n";
-    }
-}
-close(FRAME);
-close(MATCH);
-
-sub create_xml {
-    my ($source,$input,$target,$alignment,$path) = @_;
-    
-    my @INPUT = split(/ /,$input);
-    my @SOURCE = split(/ /,$source);
-    my @TARGET = split(/ /,$target);
-    my %ALIGN = &create_alignment($alignment);
-    
-    my %FRAME_INPUT;
-    my @TARGET_BITMAP; 
-    foreach (@TARGET) { push @TARGET_BITMAP,1 }
-    
-    ### STEP 1: FIND MISMATCHES
-
-    my ($s,$i) = (0,0);
-    my $currently_matching = 0;
-    my ($start_s,$start_i) = (0,0);
-
-    $path .= "X"; # indicate end
-    print "$input\n$source\n$target\n$path\n";
-    for(my $p=0;$p<length($path);$p++) {
-	my $action = substr($path,$p,1);
-	
-	# beginning of a mismatch
-	if ($currently_matching && $action ne "M" && $action ne "X") {
-	    $start_i = $i;
-	    $start_s = $s;
-	    $currently_matching = 0;
-	}
-	
-	# end of a mismatch
-	elsif (!$currently_matching && 
-	       ($action eq "M" || $action eq "X")) {
-	    
-	    # remove use of affected target words
-	    for(my $ss = $start_s; $ss<$s; $ss++) {
-		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-		    $TARGET_BITMAP[$tt] = 0;
-		}
-		
-		# also remove enclosed unaligned words?
-	    }
-	    
-	    # are there input words that need to be inserted ?
-	    print "($start_i<$i)?\n";
-	    if ($start_i<$i) {
-		
-		# take note of input words to be inserted
-		my $insertion = "";
-		for(my $ii = $start_i; $ii<$i; $ii++) {
-		    $insertion .= $INPUT[$ii]." ";
-		}
-		
-		# find position for inserted input words
-		
-		# find first removed target word
-		my $start_t = 1000;
-		for(my $ss = $start_s; $ss<$s; $ss++) {
-		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			$start_t = $tt if $tt < $start_t;
-		    }
-		}
-
-		# end of sentence? add to end
-		if ($start_t == 1000 && $i > $#INPUT) {
-		    $start_t = $#TARGET;
-		}
-		
-		# backtrack to previous words if unaligned
-		if ($start_t == 1000) {
-		    $start_t = -1;
-		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
-			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			    $start_t = $tt if $tt > $start_t;
-			}
-		    }
-		}
-		$FRAME_INPUT{$start_t} .= $insertion;
-	    }
-	    
-	    $currently_matching = 1;
-	}
-	
-	print "$action $s $i ($start_s $start_i) $currently_matching";
-	if ($action ne "I") {
-	    print " ->";
-	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
-		print " ".$tt;
-	    }
-	}
-	print "\n";
-	$s++ unless $action eq "I";
-	$i++ unless $action eq "D";
-    }
-    
-
-    print $target."\n";
-    foreach (@TARGET_BITMAP) { print $_; } print "\n";
-    foreach (sort keys %FRAME_INPUT) { 
-	print "$_: $FRAME_INPUT{$_}\n";
-    }
-
-    ### STEP 2: BUILD FRAME
-
-    # modify frame
-    my $frame = "";
-    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
-    
-    my $currently_included = 0;
-    my $start_t = -1;
-    push @TARGET_BITMAP,0; # indicate end
-
-    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
-	
-	# beginning of tm target inclusion
-	if (!$currently_included && $TARGET_BITMAP[$t]) {
-	    $start_t = $t;
-	    $currently_included = 1;
-	}
-	
-	# end of tm target inclusion (not included word or inserted input)
-	elsif ($currently_included && 
-	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
-	    # add xml (unless change is at the beginning of the sentence
-	    if ($start_t >= 0) {
-		my $target = "";
-		print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
-		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
-		    $target .= $TARGET[$tt] . " ";
-		}
-		chop($target);
-		$frame .= "<xml translation=\"$target\"> x </xml> ";
-	    }
-	    $currently_included = 0;
-	}
-	
-	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
-	print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
-    }
-
-    print $frame."\n-------------------------------------\n";
-    return $frame;
-}
-
-sub create_alignment {
-	my ($line) = @_;
-	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
-	foreach my $point (split(/ /,$line)) {
-		my ($s,$t) = split(/\-/,$point);
-		$ALIGNED_TO_S[$s]{$t}++;
-		$ALIGNED_TO_T[$t]{$s}++;
-	}
-	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
-	return %ALIGNMENT;
-}
diff --git a/contrib/fuzzy-match/old/fuzzy-match.cpp b/contrib/fuzzy-match/old/fuzzy-match.cpp
deleted file mode 100644
index 76c69e246..000000000
--- a/contrib/fuzzy-match/old/fuzzy-match.cpp
+++ /dev/null
@@ -1,982 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <vector>
-#include <map>
-#include <string>
-#include <algorithm>
-#include <iostream>
-#include <fstream>
-#include <cstring>
-#include <time.h>
-
-#include "Vocabulary.h"
-#include "SuffixArray.h"
-
-/** This implementation is explained in
-       Koehn and Senellart: "Fast Approximate String Matching 
-       with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
-
-using namespace std;
-
-Vocabulary vocabulary;
-
-int basic_flag = false;
-int lsed_flag = true;
-int refined_flag = true;
-int length_filter_flag = true;
-int parse_flag = true;
-int min_match = 70;
-int multiple_flag = false;
-int multiple_slack = 0;
-int multiple_max = 100;
-
-void load_corpus( char* fileName, vector< vector< WORD_ID > > &corpus )
-{
-	ifstream fileStream;
-	fileStream.open(fileName);
-	if (!fileStream) {
-		cerr << "file not found: " << fileName << endl;
-		exit(1);
-	}
-	istream *fileStreamP = &fileStream;
-
-	char line[LINE_MAX_LENGTH];
-	while(true)
-	{
-		SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
-		if (fileStreamP->eof()) break;
-		corpus.push_back( vocabulary.Tokenize( line ) );
-	}
-}
-
-
-/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
-
-// global cache for word pairs
-map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
-
-unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
-{
-	// check if already computed -> lookup in cache
-	pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
-	map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
-	if (lookup != lsed.end())
-	{
-		return (lookup->second);
-	}
-
-	// get surface strings for word indices
-	const string &a = vocabulary.GetWord( aIdx );
-	const string &b = vocabulary.GetWord( bIdx );
-
-	// initialize cost matrix
-	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int*  ), a.size()+1 );
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
-		cost[i][0] = i;
-	}
-	for( unsigned int j=0; j<=b.size(); j++ ) {
-		cost[0][j] = j;
-	}
-
-	// core string edit distance loop
-	for( unsigned int i=1; i<=a.size(); i++ ) {
-		for( unsigned int j=1; j<=b.size(); j++ ) {
-
-			unsigned int ins = cost[i-1][j] + 1;
-			unsigned int del = cost[i][j-1] + 1;
-			bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
-			unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
-
-			unsigned int min = (ins < del) ? ins : del;
-			min = (diag < min) ? diag : min;
-
-			cost[i][j] = min;
-		}
-	}
-
-	// clear out memory
-	unsigned int final = cost[a.size()][b.size()];
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		free( cost[i] );
-	}
-	free( cost );
-
-	// cache and return result
-	lsed[ pIdx ] = final;
-	return final;
-}
-
-/* string edit distance implementation */
-
-unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
-
-	// initialize cost and path matrices
-	unsigned int **cost  = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
-	char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
-	
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
-		path[i] = (char*) calloc( sizeof(char), b.size()+1 );
-		if (i>0)
-		{
-			cost[i][0] = cost[i-1][0];
-			if (use_letter_sed)
-			{
-				cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
-			}
-			else
-			{
-				cost[i][0]++;
-			}
-		}
-		else
-		{
-			cost[i][0] = 0;
-		}
-		path[i][0] = 'I';
-	}
-
-	for( unsigned int j=0; j<=b.size(); j++ ) {
-		if (j>0) 
-		{
-			cost[0][j] = cost[0][j-1];
-			if (use_letter_sed)
-			{
-				cost[0][j] +=	vocabulary.GetWord( b[j-1] ).size();
-			}
-			else
-			{
-				cost[0][j]++;
-			}
-		}
-		else
-		{
-			cost[0][j] = 0;
-		}
-		path[0][j] = 'D';
-	}
-
-	// core string edit distance algorithm
-	for( unsigned int i=1; i<=a.size(); i++ ) {
-		for( unsigned int j=1; j<=b.size(); j++ ) {
-			unsigned int ins = cost[i-1][j];
-			unsigned int del = cost[i][j-1];
-			unsigned int match;
-			if (use_letter_sed)
-			{
-				ins += vocabulary.GetWord( a[i-1] ).size();
-				del += vocabulary.GetWord( b[j-1] ).size();
-				match = letter_sed( a[i-1], b[j-1] );
-			}
-			else
-			{
-				ins++;
-				del++;
-				match = ( a[i-1] == b[j-1] ) ? 0 : 1;
-			}
-			unsigned int diag = cost[i-1][j-1] + match;
-
-			char action = (ins < del) ? 'I' : 'D';
-			unsigned int min = (ins < del) ? ins : del;
-			if (diag < min)
-			{
-				action = (match>0) ? 'S' : 'M';
-				min = diag;
-			}
-
-			cost[i][j] = min;
-			path[i][j] = action;
-		}
-	}
-
-	// construct string for best path
-	unsigned int i = a.size();
-	unsigned int j = b.size();
-	best_path = "";
-	while( i>0 || j>0 )
-	{
-		best_path = path[i][j] + best_path;
-		if (path[i][j] == 'I') 
-		{
-			i--;
-		}
-		else if (path[i][j] == 'D') 
-		{
-			j--;
-		}
-		else 
-		{ 
-			i--; 
-			j--;
-		}
-	}
-	
-
-	// clear out memory
-	unsigned int final = cost[a.size()][b.size()];
-
-	for( unsigned int i=0; i<=a.size(); i++ ) {
-		free( cost[i] );
-		free( path[i] );
-	}
-	free( cost );
-	free( path );
-
-	// return result
-	return final;
-}
-
-/* utlility function: compute length of sentence in characters 
-   (spaces do not count) */
-
-unsigned int compute_length( const vector< WORD_ID > &sentence )
-{
-	unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
-	{
-		length += vocabulary.GetWord( sentence[i] ).size();
-	}
-	return length;
-}
-
-/* brute force method: compare input to all corpus sentences */
-
-int basic_fuzzy_match( vector< vector< WORD_ID > > source, 
-                       vector< vector< WORD_ID > > input ) 
-{
-	// go through input set...
-	for(unsigned int i=0;i<input.size();i++)
-	{
-		bool use_letter_sed = false;
-
-		// compute sentence length and worst allowed cost
-		unsigned int input_length;
-		if (use_letter_sed)
-		{
-			input_length = compute_length( input[i] );
-		}
-		else
-		{
-			input_length = input[i].size();
-		}
-		unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
-		string best_path = "";
-		int best_match = -1;
-
-		// go through all corpus sentences
-		for(unsigned int s=0;s<source.size();s++)
-		{
-			int source_length;
-			if (use_letter_sed)
-			{
-				source_length = compute_length( source[s] );
-			}
-			else
-			{
-				source_length = source[s].size();
-			}
-			int diff = abs((int)source_length - (int)input_length);
-			if (length_filter_flag && (diff >= best_cost))
-			{
-				continue;
-			}
-
-			// compute string edit distance
-			string path;
-			unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
-
-			// update if new best
-			if (cost < best_cost) 
-			{
-				best_cost = cost;
-				best_path = path;
-				best_match = s;
-			}
-		}
-		cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
-	}
-}
-
-#define MAX_MATCH_COUNT 10000000
-
-/* data structure for n-gram match between input and corpus */
-
-class Match {
-public:
-	int input_start;
-	int input_end;
-	int tm_start;
-	int tm_end;
-	int min_cost;
-	int max_cost;
-	int internal_cost;
-	Match( int is, int ie, int ts, int te, int min, int max, int i )
-		:input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
-		{}
-};
-
-map< WORD_ID,vector< int > > single_word_index;
-
-/* definition of short matches
-   very short n-gram matches (1-grams) will not be looked up in
-   the suffix array, since there are too many matches
-   and for longer sentences, at least one 2-gram match must occur */
-
-inline int short_match_max_length( int input_length )
-{
-	if ( ! refined_flag ) 
-		return 0;
-	if ( input_length >= 5 )
-		return 1;
-	return 0;	
-}
-
-/* if we have non-short matches in a sentence, we need to
-   take a closer look at it. 
-	 this function creates a hash map for all input words and their positions
-   (to be used by the next function) 
-   (done here, because this has be done only once for an input sentence) */
-
-void init_short_matches( const vector< WORD_ID > &input )
-{
-	int max_length = short_match_max_length( input.size() );
-	if (max_length == 0)
-		return;
-
-	single_word_index.clear();
-	
-	// store input words and their positions in hash map
-	for(int i=0; i<input.size(); i++)
-	{
-		if (single_word_index.find( input[i] ) == single_word_index.end())
-		{
-			vector< int > position_vector;
-			single_word_index[ input[i] ] = position_vector;
-		}
-		single_word_index[ input[i] ].push_back( i );
-	}	
-}
-
-/* add all short matches to list of matches for a sentence */
-
-void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
-{	
-	int max_length = short_match_max_length( input_length );
-	if (max_length == 0)
-		return;
-
-	int tm_length = tm.size();
-	map< WORD_ID,vector< int > >::iterator input_word_hit;
-	for(int t_pos=0; t_pos<tm.size(); t_pos++)
-	{
-		input_word_hit = single_word_index.find( tm[t_pos] );
-		if (input_word_hit != single_word_index.end())
-		{
-			vector< int > &position_vector = input_word_hit->second;
-			for(int j=0; j<position_vector.size(); j++)
-			{
-				int &i_pos = position_vector[j];
-
-				// before match
-				int max_cost = max( i_pos , t_pos );
-				int min_cost = abs( i_pos - t_pos );
-				if ( i_pos>0 && i_pos == t_pos ) 
-					min_cost++;
-				
-				// after match
-				max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
-				min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
-				if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
-					min_cost++;
-				
-				if (min_cost <= best_cost)
-				{
-					Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
-					match.push_back( new_match );
-				}
-			}
-		} 
-	}
-}
-
-/* remove matches that are subsumed by a larger match */
-
-vector< Match > prune_matches( const vector< Match > &match, int best_cost )
-{
-	//cerr << "\tpruning";
-	vector< Match > pruned;
-	for(int i=match.size()-1; i>=0; i--)
-	{
-		//cerr << " (" << match[i].input_start << "," << match[i].input_end 
-		//		 << " ; " << match[i].tm_start << "," << match[i].tm_end 
-		//		 << " * " << match[i].min_cost << ")";
-
-		//if (match[i].min_cost > best_cost)
-		//	continue;
-
-		bool subsumed = false;
-		for(int j=match.size()-1; j>=0; j--)
-		{
-			if (i!=j // do not compare match with itself
-					&& ( match[i].input_end - match[i].input_start <= 
-							 match[j].input_end - match[j].input_start ) // i shorter than j
-					&& ((match[i].input_start == match[j].input_start &&
-							 match[i].tm_start    == match[j].tm_start	) ||
-							(match[i].input_end   == match[j].input_end &&
-							 match[i].tm_end      == match[j].tm_end) ) )
-			{
-				subsumed = true;
-			}
-		}
-		if (! subsumed && match[i].min_cost <= best_cost)
-		{
-			//cerr << "*";
-			pruned.push_back( match[i] );
-		}
-	}
-	//cerr << endl;
-	return pruned;
-}
-
-/* A* parsing method to compute string edit distance */
-
-int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
-{	
-	// cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
-
-	if (match.size() == 1)
-		return match[0].max_cost;
-	if (match.size() == 0)
-		return input_length+tm_length;
-	
-	int this_best_cost = input_length + tm_length;
-	for(int i=0;i<match.size();i++)
-	{
-		this_best_cost = min( this_best_cost, match[i].max_cost );
-	}
-	// cerr << "\tthis best cost: " << this_best_cost << endl;
-	
-	// bottom up combination of spans
-	vector< vector< Match > > multi_match;
-	multi_match.push_back( match );
-	
-	int match_level = 1;
-	while(multi_match[ match_level-1 ].size()>0)
-	{
-		// init vector
-		vector< Match > empty;
-		multi_match.push_back( empty );
-
-		for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
-		{
-			int second_level = match_level - first_level -1;
-			//cerr << "\tcombining level " << first_level << " and " << second_level << endl;
-			
-			vector< Match > &first_match  = multi_match[ first_level ];
-			vector< Match > &second_match = multi_match[ second_level ];
-
-			for(int i1 = 0; i1 < first_match.size(); i1++) {
-				for(int i2 = 0; i2 < second_match.size(); i2++) {
-
-					// do not combine the same pair twice
-					if (first_level == second_level && i2 <= i1) 
-					{
-						continue;
-					}
-
-					// get sorted matches (first is before second)
-					Match *first, *second;
-					if (first_match[i1].input_start < second_match[i2].input_start )
-					{
-						first = &first_match[i1];
-						second = &second_match[i2];
-					}
-					else
-					{
-						second = &first_match[i1];
-						first = &second_match[i2];
-					}
-
-					//cerr << "\tcombining " 
-					//		 << "(" << first->input_start << "," << first->input_end << "), "
-					//		 << first->tm_start << " [" << first->internal_cost << "]"
-					//		 << " with "
-					//		 << "(" << second->input_start << "," << second->input_end << "), "
-					//		 << second->tm_start<< " [" << second->internal_cost << "]"
-					//		 << endl;
-
-					// do not process overlapping matches
-					if (first->input_end >= second->input_start) 
-					{
-						continue;
-					}
-
-					// no overlap / mismatch in tm
-					if (first->tm_end >= second->tm_start)
-					{
-						continue;
-					}
-
-					// compute cost
-					int min_cost = 0;
-					int max_cost = 0;
-
-					// initial
-					min_cost += abs( first->input_start - first->tm_start );
-					max_cost += max( first->input_start, first->tm_start );				 
-
-					// same number of words, but not sent. start -> cost is at least 1 
-					if (first->input_start == first->tm_start && first->input_start > 0)
-					{
-						min_cost++;
-					}
-
-					// in-between
-					int skipped_words = second->input_start - first->input_end -1;
-					int skipped_words_tm = second->tm_start - first->tm_end -1;
-					int internal_cost = max( skipped_words, skipped_words_tm );
-					internal_cost += first->internal_cost + second->internal_cost;
-					min_cost += internal_cost;
-					max_cost += internal_cost;
-					
-					// final
-					min_cost += abs( (tm_length-1 - second->tm_end) -
-													 (input_length-1 - second->input_end) );
-					max_cost += max( (tm_length-1 - second->tm_end),
-													 (input_length-1 - second->input_end) );
-
-					// same number of words, but not sent. end -> cost is at least 1
-					if ( ( input_length-1 - second->input_end 
-								 == tm_length-1 - second->tm_end )
-							 && input_length-1 != second->input_end )
-					{
-						min_cost++;
-					}
-
-					// cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
-
-					// if worst than best cost, forget it
-					if (min_cost > best_cost)					
-					{
-						continue;
-					}
-					
-					// add match
-					Match new_match( first->input_start,
-													 second->input_end,
-													 first->tm_start,
-													 second->tm_end,
-													 min_cost,
-													 max_cost,
-													 internal_cost);
-					multi_match[ match_level ].push_back( new_match );
-					// cerr << "\tstored\n";
-					
-					// possibly updating this_best_cost
-					if (max_cost < this_best_cost)
-					{
-						// cerr << "\tupdating this best cost to " << max_cost << "\n";
-						this_best_cost = max_cost;
-
-						// possibly updating best_cost
-						if (max_cost < best_cost)
-						{
-							// cerr << "\tupdating best cost to " << max_cost << "\n";
-							best_cost = max_cost;
-						}					
-					}
-				}
-			}
-		}
-		match_level++;
-	}
-	return this_best_cost;
-}
-
-int main(int argc, char* argv[]) 
-{
-	vector< vector< WORD_ID > > source, input;
-
-	while(1) {
-		static struct option long_options[] = {
-			{"basic", no_argument, &basic_flag, 1},
-			{"word", no_argument, &lsed_flag, 0},
-			{"unrefined", no_argument, &refined_flag, 0},
-			{"nolengthfilter", no_argument, &length_filter_flag, 0},
-			{"noparse", no_argument, &parse_flag, 0},
-			{"multiple", no_argument, &multiple_flag, 1},
-			{"minmatch", required_argument, 0, 'm'},
-			{0, 0, 0, 0}
-		};
-		int option_index = 0;
-		int c = getopt_long (argc, argv, "m:", long_options, &option_index);
-		if (c == -1) break;
-		switch (c) {
-			case 0:
-//				if (long_options[option_index].flag != 0)
-//					break;
-//				printf ("option %s", long_options[option_index].name);
-//				if (optarg)
-//					printf (" with arg %s", optarg);
-//				printf ("\n");
-				break;
-			case 'm':
-				min_match = atoi(optarg);
-				if (min_match < 1 || min_match > 100) {
-					cerr << "error: --minmatch must have value in range 1..100\n";
-					exit(1);
-				}
-				cerr << "setting min match to " << min_match << endl;
-				break;
-			default:
-				cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
-				exit(1);
-		}
-	}
-	if (lsed_flag) { cerr << "lsed\n"; }
-	if (basic_flag) { cerr << "basic\n"; }
-	if (refined_flag) { cerr << "refined\n"; }
-	if (length_filter_flag) { cerr << "length filter\n"; }
-	if (parse_flag) { cerr << "parse\n"; }
-//	exit(1);
-
-
-	if (optind+2 != argc) {
-		cerr << "syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
-		exit(1);
-	}
-	
-	cerr << "loading corpus...\n";
-
-	load_corpus(argv[optind], input);
-	load_corpus(argv[optind+1], source);
-
-  // ./fuzzy-match input corpus [-basic] 
-	
-//	load_corpus("../corpus/tm.truecased.4.en", source);
-//	load_corpus("../corpus/tm.truecased.4.it", target);
-//	load_corpus("../evaluation/test.input.tc.4", input);
-
-//	load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
-//	load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
-
-//	load_corpus("../corpus/tm.truecased.16.en", source);
-//	load_corpus("../evaluation/test.input.tc.16", input);
-
-	if (basic_flag) {
-		cerr << "using basic method\n";
-		clock_t start_main_clock2 = clock();
-		basic_fuzzy_match( source, input );
-		cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
-		exit(1);
-	}
-
-	cerr << "number of input sentences " << input.size() << endl;
-
-	cerr << "creating suffix array...\n";
-//	SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
-//	SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
-	SuffixArray suffixArray( argv[optind+1] );
-	
-	clock_t start_main_clock = clock();
-
-	// looping through all input sentences...
-	cerr << "looping...\n";
-	for(unsigned int i=0;i<input.size();i++)
-	{
-		clock_t start_clock = clock();
-		// if (i % 10 == 0) cerr << ".";
-		int input_id = i; // clean up this mess!
-
-		// establish some basic statistics
-
-		// int input_length = compute_length( input[i] );
-		int input_length = input[i].size();
-		int best_cost = input_length * (100-min_match) / 100 + 1;
-
-		int match_count = 0; // how many substring matches to be considered
-		//cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
-
-		// find match ranges in suffix array
-		vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
-		for(size_t start=0;start<input[i].size();start++) 
-		{
-			SuffixArray::INDEX prior_first_match = 0;
-			SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
-			vector< string > substring;
-			bool stillMatched = true;
-			vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
-			//cerr << "start: " << start;
-			for(int word=start; stillMatched && word<input[i].size(); word++)
-			{
-				substring.push_back( vocabulary.GetWord( input[i][word] ) );
-
-				// only look up, if needed (i.e. no unnecessary short gram lookups)
-//				if (! word-start+1 <= short_match_max_length( input_length ) )
-				//			{
-				SuffixArray::INDEX first_match, last_match;
-				stillMatched = false;
-				if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
-				{
-					stillMatched = true;
-					matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
-					//cerr << " (" << first_match << "," << last_match << ")";
-					//cerr << " " << ( last_match - first_match + 1 );
-					prior_first_match = first_match;
-					prior_last_match = last_match;
-				}
-					//}
-			}
-			//cerr << endl;
-			match_range.push_back( matchedAtThisStart );
-		}
-
-		clock_t clock_range = clock();
-
-		map< int, vector< Match > > sentence_match;
-		map< int, int > sentence_match_word_count;
-
-		// go through all matches, longest first
-		for(int length = input[i].size(); length >= 1; length--)
-		{
-			// do not create matches, if these are handled by the short match function
-			if (length <= short_match_max_length( input_length ) )
-			{
-				continue;
-			}
-
-			unsigned int count = 0;
-			for(int start = 0; start <= input[i].size() - length; start++)
-			{
-				if (match_range[start].size() >= length)
-				{
-					pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
-					// cerr << " (" << range.first << "," << range.second << ")";
-					count += range.second - range.first + 1;
-
-					for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
-					{
-						int position = suffixArray.GetPosition( i );
-
-						// sentence length mismatch
-						size_t sentence_id = suffixArray.GetSentence( position );
-						int sentence_length = suffixArray.GetSentenceLength( sentence_id );
-						int diff = abs( (int)sentence_length - (int)input_length );
-						// cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
-						//if (length <= 2 && input_length>=5 &&
-						//		sentence_match.find( sentence_id ) == sentence_match.end())
-						//	continue;
-
-						if (diff > best_cost)
-							continue;
-
-						// compute minimal cost
-						int start_pos = suffixArray.GetWordInSentence( position );
-						int end_pos = start_pos + length-1;
-						// cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. " 
-						// << start << "-" << (start+length-1) << " (" << input_length << ")"; 
-						// different number of prior words -> cost is at least diff
-						int min_cost = abs( start - start_pos );
-						
-						// same number of words, but not sent. start -> cost is at least 1 
-						if (start == start_pos && start>0)
-							min_cost++;
-
-						// different number of remaining words -> cost is at least diff
-						min_cost += abs( ( sentence_length-1 - end_pos ) -
-														 ( input_length-1 - (start+length-1) ) );
-
-						// same number of words, but not sent. end -> cost is at least 1
-						if ( sentence_length-1 - end_pos ==
-								 input_length-1 - (start+length-1)
-								 && end_pos != sentence_length-1 )
-							min_cost++;
-
-						// cerr << " -> min_cost " << min_cost;
-						if (min_cost > best_cost)
-							continue;
-
-						// valid match
-						match_count++;
-
-						// compute maximal cost
-						int max_cost = max( start, start_pos )
-							+ max( sentence_length-1 - end_pos,
-										 input_length-1 - (start+length-1) );
-						// cerr << ", max_cost " << max_cost;
-						
-						Match m = Match( start, start+length-1, 
-														 start_pos, start_pos+length-1, 
-														 min_cost, max_cost, 0);
-						sentence_match[ sentence_id ].push_back( m );
-						sentence_match_word_count[ sentence_id ] += length;
-
-						if (max_cost < best_cost)
-						{
-							best_cost = max_cost;
-							if (best_cost == 0) break;
-						}
-						//if (match_count >= MAX_MATCH_COUNT) break;
-					}
-				}
-				// cerr << endl;
-				if (best_cost == 0) break;
-				//if (match_count >= MAX_MATCH_COUNT) break;
-			}
-			// cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
-
-			if (best_cost == 0) break;
-			//if (match_count >= MAX_MATCH_COUNT) break;
-		}
-		cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
-
-		clock_t clock_matches = clock();
-
-		// consider each sentence for which we have matches
-		int old_best_cost = best_cost;
-		int tm_count_word_match = 0;
-		int tm_count_word_match2 = 0;
-		int pruned_match_count = 0;
-		if (short_match_max_length( input_length ))
-		{
-			init_short_matches( input[i] );
-		}
-		vector< int > best_tm;
-		typedef map< int, vector< Match > >::iterator I;
-
-		clock_t clock_validation_sum = 0;
-
-		for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
-		{
-			int tmID = tm->first;
-			int tm_length = suffixArray.GetSentenceLength(tmID);
-			vector< Match > &match = tm->second;
-			add_short_matches( match, source[tmID], input_length, best_cost );
-
-			//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
-
-			// quick look: how many words are matched
-			int words_matched = 0;
-			for(int m=0;m<match.size();m++) {
-
-				if (match[m].min_cost <= best_cost) // makes no difference
-					words_matched += match[m].input_end - match[m].input_start + 1;
-			}
-			if (max(input_length,tm_length) - words_matched > best_cost)
-			{
-				if (length_filter_flag) continue;
-			}
-			tm_count_word_match++;
-
-			// prune, check again how many words are matched
-			vector< Match > pruned = prune_matches( match, best_cost );
-			words_matched = 0;
-			for(int p=0;p<pruned.size();p++) {
-				words_matched += pruned[p].input_end - pruned[p].input_start + 1;
-			}
-			if (max(input_length,tm_length) - words_matched > best_cost)
-			{
-				if (length_filter_flag) continue;
-			}
-			tm_count_word_match2++;
-
-			pruned_match_count += pruned.size();
-			int prior_best_cost = best_cost;
-			int cost;
-
-			clock_t clock_validation_start = clock();
-			if (! parse_flag ||
-			    pruned.size()>=10) // to prevent worst cases
-			{
-				string path;
-				cost = sed( input[input_id], source[tmID], path, false );
-				if (cost <  best_cost) 
-				{
-					best_cost = cost;
-				}
-			}
-
-			else
-			{
-				cost = parse_matches( pruned, input_length, tm_length, best_cost );
-				if (prior_best_cost != best_cost)
-				{
-					best_tm.clear();
-				}
-			}
-			clock_validation_sum += clock() - clock_validation_start;
-			if (cost == best_cost)
-			{
-				best_tm.push_back( tmID );
-			}
-		}
-		cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
-		cerr << "tm considered: " << sentence_match.size()
-				 << " word-matched: " << tm_count_word_match 
-				 << " word-matched2: " << tm_count_word_match2 
-				 << " best: " << best_tm.size() << endl;
-
-		cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
-
-		// do not try to find the best ... report multiple matches
-		if (multiple_flag) {
-			int input_letter_length = compute_length( input[input_id] );
-			for(int si=0; si<best_tm.size(); si++) {
-				int s = best_tm[si];
-				string path;
-				unsigned int letter_cost = sed( input[input_id], source[s], path, true );
-				// do not report multiple identical sentences, but just their count
-				cout << i << " "; // sentence number
-				cout << letter_cost << "/" << input_letter_length << " ";
-				cout << "(" << best_cost <<"/" << input_length <<") ";
-				cout << "||| " << s << " ||| " << path << endl;
-			}
-			continue;
-		}
-
-		// find the best matches according to letter sed
-		string best_path = "";
-		int best_match = -1;
-		int best_letter_cost;
-		if (lsed_flag) {
-			best_letter_cost = compute_length( input[input_id] ) * min_match / 100 + 1;
-			for(int si=0; si<best_tm.size(); si++)
-			{
-				int s = best_tm[si];
-				string path;
-				unsigned int letter_cost = sed( input[input_id], source[s], path, true );
-				if (letter_cost < best_letter_cost)
-				{
-					best_letter_cost = letter_cost;
-					best_path = path;
-					best_match = s;
-				}
-			}
-		}
-		// if letter sed turned off, just compute path for first match
-		else {
-			if (best_tm.size() > 0) {
-				string path;
-				sed( input[input_id], source[best_tm[0]], path, false );
-				best_path = path;
-				best_match = best_tm[0];
-			}
-		}
-		cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
-				 << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
-				 << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
-				 << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
-				 << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
-				 << " )" << endl;
-		if (lsed_flag) {
-			cout << best_letter_cost << "/" << compute_length( input[input_id] ) << " (";
-		}
-		cout << best_cost <<"/" << input_length;
-		if (lsed_flag) 	cout << ")";
-		cout << " ||| " << best_match << " ||| " << best_path << endl;
-	}
-	cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
-	
-
-}
diff --git a/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl b/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl
deleted file mode 100644
index 49e9ce1ec..000000000
--- a/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-
-my $src_in = "corpus/acquis.truecased.4.en";
-my $tgt_in = "corpus/acquis.truecased.4.fr";
-my $align_in = "model/aligned.4.grow-diag-final-and";
-
-my $src_out = "data/acquis.truecased.4.en.uniq";
-my $tgt_out = "data/acquis.truecased.4.fr.uniq";
-my $tgt_mf  = "data/acquis.truecased.4.fr.uniq.most-frequent";
-my $align_out = "data/acquis.truecased.4.align.uniq";
-my $align_mf  = "data/acquis.truecased.4.align.uniq.most-frequent";
-
-my (%TRANS,%ALIGN);
-
-open(SRC,$src_in);
-open(TGT,$tgt_in);
-open(ALIGN,$align_in);
-while(my $src = <SRC>) {
-  my $tgt = <TGT>;
-  my $align = <ALIGN>;
-  chop($tgt);
-  chop($align);
-  $TRANS{$src}{$tgt}++;
-  $ALIGN{$src}{$tgt} = $align;
-}
-close(SRC);
-close(TGT);
-
-open(SRC_OUT,">$src_out");
-open(TGT_OUT,">$tgt_out");
-open(TGT_MF, ">$tgt_mf");
-open(ALIGN_OUT,">$align_out");
-open(ALIGN_MF, ">$align_mf");
-foreach my $src (keys %TRANS) {
-  print SRC_OUT $src;
-  my $first = 1;
-  my ($max,$best) = (0);
-  foreach my $tgt (keys %{$TRANS{$src}}) {
-    print TGT_OUT " ||| " unless $first;
-    print TGT_OUT $TRANS{$src}{$tgt}." ".$tgt;
-    print ALIGN_OUT " ||| " unless $first;
-    print ALIGN_OUT $ALIGN{$src}{$tgt};
-    if ($TRANS{$src}{$tgt} > $max) {
-      $max = $TRANS{$src}{$tgt};
-      $best = $tgt;
-    }
-    $first = 0;
-  }
-  print TGT_OUT "\n";
-  print ALIGN_OUT "\n";
-  print TGT_MF $best."\n";
-  print ALIGN_MF $ALIGN{$src}{$best}."\n";
-}
-close(SRC_OUT);
-close(TGT_OUT);
-
diff --git a/contrib/fuzzy-match/old/make-pt-from-tm.perl b/contrib/fuzzy-match/old/make-pt-from-tm.perl
deleted file mode 100755
index 6bdb2fa93..000000000
--- a/contrib/fuzzy-match/old/make-pt-from-tm.perl
+++ /dev/null
@@ -1,308 +0,0 @@
-#!/usr/bin/perl -w 
-
-use strict;
-use FindBin qw($RealBin);
-use File::Basename;
-
-my $DEBUG = 1;
-my $OUTPUT_RULES = 1;
-
-#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
-my $in_file 		= $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
-my $source_file 	= $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
-my $target_file 	= $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
-my $alignment_file	= $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
-my $lex_file		= $ARGV[4]; #$data_root/in/lex.4;
-my $pt_file			= $ARGV[5]; #"$data_root/out/pt";
-
-my $cmd;
-
-my $TMPDIR=dirname($pt_file)  ."/tmp.$$";
-$cmd = "mkdir -p $TMPDIR";
-`$cmd`;
-
-my $match_file  = "$TMPDIR/match";
-
-# suffix array creation and extraction
-$cmd = "$RealBin/fuzzy-match --multiple $in_file  $source_file > $match_file";
-print STDERR "$cmd \n";
-`$cmd`;
-
-# make into xml and pt
-my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
-
-my @INPUT = `cat $in_file`; chop(@INPUT);
-my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
-my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
-my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
-
-open(MATCH,$match_file);
-open(FRAME,">$out_file");
-open(RULE,">$out_file.extract") if $OUTPUT_RULES;
-open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
-open(INFO,">$out_file.info");
-while( my $match = <MATCH> ) {
-    chop($match);
-    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
-
-    $score =~ /^(\d+) (.+)/ || die;
-    my ($i,$match_score) = ($1,$2);
-	print STDERR "i=$i match_score=$match_score\n";
-	
-    # construct frame
-    if ($sentence < 1e9 && $sentence >= 0) {
-		my $SOURCE = $ALL_SOURCE[$sentence];
-		my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
-		my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
-		
-		for(my $j=0;$j<scalar(@TARGET);$j++) {
-			$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
-			my ($target_count,$target) = ($1,$2);
-			my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = 
-			&create_xml($SOURCE,
-					$INPUT[$i],
-					$target,
-					$ALIGNMENT[$j],
-					$path);
-			print FRAME $frame."\n";
-			print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
-			print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
-			print INFO "$i ||| $match_score ||| $target_count\n";
-		}
-    }
-}
-close(FRAME);
-close(MATCH);
-close(RULE) if $OUTPUT_RULES;
-close(RULE_INV) if $OUTPUT_RULES;
-
-`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
-`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
-
-if ($OUTPUT_RULES)
-{
-  $cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
-  print STDERR "Executing: $cmd \n";
-  `$cmd`;
-}
-
-#$cmd = "rm -rf $TMPDIR";
-#`$cmd`;
-
-#######################################################
-sub create_xml {
-    my ($source,$input,$target,$alignment,$path) = @_;
-    
-	print STDERR " HIEU \n $source \n $input \n $target \n $alignment \n $path \n";
-
-    my @INPUT = split(/ /,$input);
-    my @SOURCE = split(/ /,$source);
-    my @TARGET = split(/ /,$target);
-    my %ALIGN = &create_alignment($alignment);
-    
-    my %FRAME_INPUT;
-    my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
-    foreach (@TARGET) { push @TARGET_BITMAP,1 }
-    
-    ### STEP 1: FIND MISMATCHES
-
-    my ($s,$i) = (0,0);
-    my $currently_matching = 0;
-    my ($start_s,$start_i) = (0,0);
-
-    $path .= "X"; # indicate end
-    print STDERR "$input\n$source\n$target\n$path\n";
-    for(my $p=0;$p<length($path);$p++) {
-	my $action = substr($path,$p,1);
-
-	# beginning of a mismatch
-	if ($currently_matching && $action ne "M" && $action ne "X") {
-	    $start_i = $i;
-	    $start_s = $s;
-	    $currently_matching = 0;
-	}
-	
-	# end of a mismatch
-	elsif (!$currently_matching && 
-	       ($action eq "M" || $action eq "X")) {
-	    
-	    # remove use of affected target words
-	    for(my $ss = $start_s; $ss<$s; $ss++) {
-		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-		    $TARGET_BITMAP[$tt] = 0;
-		}
-		
-		# also remove enclosed unaligned words?
-	    }
-	    
-	    # are there input words that need to be inserted ?
-	    print STDERR "($start_i<$i)?\n";
-	    if ($start_i<$i) {
-		
-		# take note of input words to be inserted
-		my $insertion = "";
-		for(my $ii = $start_i; $ii<$i; $ii++) {
-		    $insertion .= $INPUT[$ii]." ";
-		}
-		
-		# find position for inserted input words
-		
-		# find first removed target word
-		my $start_t = 1000;
-		for(my $ss = $start_s; $ss<$s; $ss++) {
-		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			$start_t = $tt if $tt < $start_t;
-		    }
-		}
-
-		# end of sentence? add to end
-		if ($start_t == 1000 && $i > $#INPUT) {
-		    $start_t = $#TARGET;
-		}
-		
-		# backtrack to previous words if unaligned
-		if ($start_t == 1000) {
-		    $start_t = -1;
-		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
-			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			    $start_t = $tt if $tt > $start_t;
-			}
-		    }
-		}
-		$FRAME_INPUT{$start_t} .= $insertion;
-		my %NT = ("start_t" => $start_t,
-			  "start_i" => $start_i );
-		push @NT,\%NT;		
-	    }	    
-	    $currently_matching = 1;
-	}
-	
-	print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
-	if ($action ne "I") {
-	    print STDERR " ->";
-	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
-		print STDERR " ".$tt;
-	    }
-	}
-	print STDERR "\n";
-	$s++ unless $action eq "I";
-	$i++ unless $action eq "D";
-	$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
-	push @INPUT_BITMAP, 1 if $action eq "M";
-	push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
-    }
-    
-
-    print STDERR $target."\n";
-    foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
-    foreach (sort keys %FRAME_INPUT) { 
-	print STDERR "$_: $FRAME_INPUT{$_}\n";
-    }
-
-    ### STEP 2: BUILD RULE AND FRAME
-        
-    # hierarchical rule
-    my $rule_s = "";
-    my $rule_pos_s = 0;
-    my %RULE_ALIGNMENT_S;
-    for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
-		if ($INPUT_BITMAP[$i]) {
-			$rule_s .= $INPUT[$i]." ";
-			$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
-		}
-		foreach my $NT (@NT) {
-			if ($i == $$NT{"start_i"}) {
-				$rule_s .= "[X][X] ";
-				$$NT{"rule_pos_s"} = $rule_pos_s++;
-			}
-		}
-    }
-
-    my $rule_t = "";
-    my $rule_pos_t = 0;
-    my %RULE_ALIGNMENT_T;
-    for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
-	if ($t>=0 && $TARGET_BITMAP[$t]) {
-	    $rule_t .= $TARGET[$t]." ";
-	    $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
-	}
-	foreach my $NT (@NT) {
-	    if ($t == $$NT{"start_t"}) {
-		$rule_t .= "[X][X] ";
-		$$NT{"rule_pos_t"} = $rule_pos_t++;
-	    }
-	}
-    }
-
-    my $rule_alignment = "";
-    foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
-	foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
-	    next unless defined($RULE_ALIGNMENT_T{$t});
-	    $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
-	}
-    }
-    foreach my $NT (@NT) {
-	$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
-    }
-    
-    chop($rule_s);
-    chop($rule_t);
-    chop($rule_alignment);
-
-    my $rule_alignment_inv = "";
-    foreach (split(/ /,$rule_alignment)) {
-	/^(\d+)\-(\d+)$/;
-	$rule_alignment_inv .= "$2-$1 ";
-    }
-    chop($rule_alignment_inv);
-
-    # frame
-    my $frame = "";
-    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
-
-    my $currently_included = 0;
-    my $start_t = -1;
-    push @TARGET_BITMAP,0; # indicate end
-
-    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
-	# beginning of tm target inclusion
-	if (!$currently_included && $TARGET_BITMAP[$t]) {
-	    $start_t = $t;
-	    $currently_included = 1;
-	}
-	
-	# end of tm target inclusion (not included word or inserted input)
-	elsif ($currently_included && 
-	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
-	    # add xml (unless change is at the beginning of the sentence
-	    if ($start_t >= 0) {
-		my $target = "";
-		print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
-		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
-		    $target .= $TARGET[$tt] . " ";
-		}
-		chop($target);
-		$frame .= "<xml translation=\"$target\"> x </xml> ";
-	    }
-	    $currently_included = 0;
-	}
-	
-	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
-	print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
-    }
-
-    print STDERR $frame."\n-------------------------------------\n";
-    return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
-}
-
-sub create_alignment {
-	my ($line) = @_;
-	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
-	foreach my $point (split(/ /,$line)) {
-		my ($s,$t) = split(/\-/,$point);
-		$ALIGNED_TO_S[$s]{$t}++;
-		$ALIGNED_TO_T[$t]{$s}++;
-	}
-	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
-	return %ALIGNMENT;
-}
diff --git a/contrib/fuzzy-match/old/make-pt-from-tm2.perl b/contrib/fuzzy-match/old/make-pt-from-tm2.perl
deleted file mode 100755
index 3a5fa4171..000000000
--- a/contrib/fuzzy-match/old/make-pt-from-tm2.perl
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/perl -w -d 
-
-use strict;
-use FindBin qw($RealBin);
-use File::Basename;
-
-my $DEBUG = 1;
-my $OUTPUT_RULES = 1;
-
-#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
-my $in_file 		= $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
-my $source_file 	= $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
-my $target_file 	= $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
-my $alignment_file	= $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
-my $lex_file		= $ARGV[4]; #$data_root/in/lex.4;
-my $pt_file			= $ARGV[5]; #"$data_root/out/pt";
-
-my $cmd;
-
-my $TMPDIR= "/tmp/tmp.$$";
-$cmd = "mkdir -p $TMPDIR";
-`$cmd`;
-$TMPDIR = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/out/tmp.3196";
-
-my $match_file  = "$TMPDIR/match";
-
-# suffix array creation and extraction
-$cmd = "$RealBin/fuzzy-match --multiple $in_file  $source_file > $match_file";
-`$cmd`;
-
-# make into xml and pt
-my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
-
-open(MATCH,$match_file);
-open(FRAME,">$out_file");
-open(RULE,">$out_file.extract") if $OUTPUT_RULES;
-open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
-open(INFO,">$out_file.info");
-while( my $match = <MATCH> ) {
-    chop($match);
-    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
-
-    $score =~ /^(\d+) (.+)/ || die;
-    my ($i,$match_score) = ($1,$2);
-
-    # construct frame
-    if ($sentence < 1e9 && $sentence >= 0) {
-		my $SOURCE = $ALL_SOURCE[$sentence];
-		my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
-		my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
-		
-		for(my $j=0;$j<scalar(@TARGET);$j++) {
-			$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
-			my ($target_count,$target) = ($1,$2);
-			my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = 
-			&create_xml($SOURCE,
-					$INPUT[$i],
-					$target,
-					$ALIGNMENT[$j],
-					$path);
-			print FRAME $frame."\n";
-			print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
-			print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
-			print INFO "$i ||| $match_score ||| $target_count\n";
-		}
-    }
-}
-close(FRAME);
-close(MATCH);
-close(RULE) if $OUTPUT_RULES;
-close(RULE_INV) if $OUTPUT_RULES;
-
-`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
-`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
-
-if ($OUTPUT_RULES)
-{
-  $cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
-  print STDERR "Executing: $cmd \n";
-  `$cmd`;
-}
-
-#$cmd = "rm -rf $TMPDIR";
-#`$cmd`;
-
-#######################################################
-sub create_xml {
-    my ($source,$input,$target,$alignment,$path) = @_;
-    
-    my @INPUT = split(/ /,$input);
-    my @SOURCE = split(/ /,$source);
-    my @TARGET = split(/ /,$target);
-    my %ALIGN = &create_alignment($alignment);
-    
-    my %FRAME_INPUT;
-    my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
-    foreach (@TARGET) { push @TARGET_BITMAP,1 }
-    
-    ### STEP 1: FIND MISMATCHES
-
-    my ($s,$i) = (0,0);
-    my $currently_matching = 0;
-    my ($start_s,$start_i) = (0,0);
-
-    $path .= "X"; # indicate end
-    print STDERR "$input\n$source\n$target\n$path\n";
-    for(my $p=0;$p<length($path);$p++) {
-	my $action = substr($path,$p,1);
-
-	# beginning of a mismatch
-	if ($currently_matching && $action ne "M" && $action ne "X") {
-	    $start_i = $i;
-	    $start_s = $s;
-	    $currently_matching = 0;
-	}
-	
-	# end of a mismatch
-	elsif (!$currently_matching && 
-	       ($action eq "M" || $action eq "X")) {
-	    
-	    # remove use of affected target words
-	    for(my $ss = $start_s; $ss<$s; $ss++) {
-		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-		    $TARGET_BITMAP[$tt] = 0;
-		}
-		
-		# also remove enclosed unaligned words?
-	    }
-	    
-	    # are there input words that need to be inserted ?
-	    print STDERR "($start_i<$i)?\n";
-	    if ($start_i<$i) {
-		
-		# take note of input words to be inserted
-		my $insertion = "";
-		for(my $ii = $start_i; $ii<$i; $ii++) {
-		    $insertion .= $INPUT[$ii]." ";
-		}
-		
-		# find position for inserted input words
-		
-		# find first removed target word
-		my $start_t = 1000;
-		for(my $ss = $start_s; $ss<$s; $ss++) {
-		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			$start_t = $tt if $tt < $start_t;
-		    }
-		}
-
-		# end of sentence? add to end
-		if ($start_t == 1000 && $i > $#INPUT) {
-		    $start_t = $#TARGET;
-		}
-		
-		# backtrack to previous words if unaligned
-		if ($start_t == 1000) {
-		    $start_t = -1;
-		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
-			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			    $start_t = $tt if $tt > $start_t;
-			}
-		    }
-		}
-		$FRAME_INPUT{$start_t} .= $insertion;
-		my %NT = ("start_t" => $start_t,
-			  "start_i" => $start_i );
-		push @NT,\%NT;		
-	    }	    
-	    $currently_matching = 1;
-	}
-	
-	print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
-	if ($action ne "I") {
-	    print STDERR " ->";
-	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
-		print STDERR " ".$tt;
-	    }
-	}
-	print STDERR "\n";
-	$s++ unless $action eq "I";
-	$i++ unless $action eq "D";
-	$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
-	push @INPUT_BITMAP, 1 if $action eq "M";
-	push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
-    }
-    
-
-    print STDERR $target."\n";
-    foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
-    foreach (sort keys %FRAME_INPUT) { 
-	print STDERR "$_: $FRAME_INPUT{$_}\n";
-    }
-
-    ### STEP 2: BUILD RULE AND FRAME
-        
-    # hierarchical rule
-    my $rule_s = "";
-    my $rule_pos_s = 0;
-    my %RULE_ALIGNMENT_S;
-    for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
-		if ($INPUT_BITMAP[$i]) {
-			$rule_s .= $INPUT[$i]." ";
-			$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
-		}
-		foreach my $NT (@NT) {
-			if ($i == $$NT{"start_i"}) {
-				$rule_s .= "[X][X] ";
-				$$NT{"rule_pos_s"} = $rule_pos_s++;
-			}
-		}
-    }
-
-    my $rule_t = "";
-    my $rule_pos_t = 0;
-    my %RULE_ALIGNMENT_T;
-    for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
-	if ($t>=0 && $TARGET_BITMAP[$t]) {
-	    $rule_t .= $TARGET[$t]." ";
-	    $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
-	}
-	foreach my $NT (@NT) {
-	    if ($t == $$NT{"start_t"}) {
-		$rule_t .= "[X][X] ";
-		$$NT{"rule_pos_t"} = $rule_pos_t++;
-	    }
-	}
-    }
-
-    my $rule_alignment = "";
-    foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
-	foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
-	    next unless defined($RULE_ALIGNMENT_T{$t});
-	    $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
-	}
-    }
-    foreach my $NT (@NT) {
-	$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
-    }
-    
-    chop($rule_s);
-    chop($rule_t);
-    chop($rule_alignment);
-
-    my $rule_alignment_inv = "";
-    foreach (split(/ /,$rule_alignment)) {
-	/^(\d+)\-(\d+)$/;
-	$rule_alignment_inv .= "$2-$1 ";
-    }
-    chop($rule_alignment_inv);
-
-    # frame
-    my $frame = "";
-    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
-
-    my $currently_included = 0;
-    my $start_t = -1;
-    push @TARGET_BITMAP,0; # indicate end
-
-    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
-	# beginning of tm target inclusion
-	if (!$currently_included && $TARGET_BITMAP[$t]) {
-	    $start_t = $t;
-	    $currently_included = 1;
-	}
-	
-	# end of tm target inclusion (not included word or inserted input)
-	elsif ($currently_included && 
-	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
-	    # add xml (unless change is at the beginning of the sentence
-	    if ($start_t >= 0) {
-		my $target = "";
-		print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
-		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
-		    $target .= $TARGET[$tt] . " ";
-		}
-		chop($target);
-		$frame .= "<xml translation=\"$target\"> x </xml> ";
-	    }
-	    $currently_included = 0;
-	}
-	
-	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
-	print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
-    }
-
-    print STDERR $frame."\n-------------------------------------\n";
-    return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
-}
-
-sub create_alignment {
-	my ($line) = @_;
-	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
-	foreach my $point (split(/ /,$line)) {
-		my ($s,$t) = split(/\-/,$point);
-		$ALIGNED_TO_S[$s]{$t}++;
-		$ALIGNED_TO_T[$t]{$s}++;
-	}
-	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
-	return %ALIGNMENT;
-}
diff --git a/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl b/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
deleted file mode 100755
index e16c9de75..000000000
--- a/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/perl -w 
-
-use strict;
-
-my $DEBUG = 1;
-my $OUTPUT_RULES = 1;
-
-my $scripts_root_dir = "/Users/hieuhoang/workspace/github/hieuhoang/scripts";
-
-my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
-#my $match_file  = "$data_root/in/BEST.acquis-xml-escaped.4.uniq.multi.tuning";
-my $match_file  = "$data_root/out/BEST";
-my $source_file = "$data_root/in/acquis.truecased.4.en.uniq";
-my $target_file = "$data_root/in/acquis.truecased.4.fr.uniq";
-my $alignment_file = "$data_root/in/acquis.truecased.4.align.uniq";
-my $out_file = "$data_root/out/ac-test.input.xml.4.uniq.multi.tuning";
-my $in_file = "$data_root/in/ac-test.input.tc.4";
-
-#my $match_file  = "tm/BEST.acquis-xml-escaped.4.uniq.multi";
-#my $source_file = "data/acquis.truecased.4.en.uniq";
-#my $target_file = "data/acquis.truecased.4.fr.uniq";
-#my $alignment_file = "data/acquis.truecased.4.align.uniq";
-#my $out_file = "data/ac-test.input.xml.4.uniq.multi.xxx";
-#my $in_file = "evaluation/ac-test.input.tc.4";
-
-my @INPUT = `cat $in_file`; chop(@INPUT);
-my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
-my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
-my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
-
-open(MATCH,$match_file);
-open(FRAME,">$out_file");
-open(RULE,">$out_file.extract") if $OUTPUT_RULES;
-open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
-open(INFO,">$out_file.info");
-while( my $match = <MATCH> ) {
-    chop($match);
-    my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
-
-    $score =~ /^(\d+) (.+)/ || die;
-    my ($i,$match_score) = ($1,$2);
-
-    # construct frame
-    if ($sentence < 1e9 && $sentence >= 0) {
-		my $SOURCE = $ALL_SOURCE[$sentence];
-		my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
-		my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
-		
-		for(my $j=0;$j<scalar(@TARGET);$j++) {
-			$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
-			my ($target_count,$target) = ($1,$2);
-			my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = 
-			&create_xml($SOURCE,
-					$INPUT[$i],
-					$target,
-					$ALIGNMENT[$j],
-					$path);
-			print FRAME $frame."\n";
-			print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
-			print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
-			print INFO "$i ||| $match_score ||| $target_count\n";
-		}
-    }
-}
-close(FRAME);
-close(MATCH);
-close(RULE) if $OUTPUT_RULES;
-close(RULE_INV) if $OUTPUT_RULES;
-
-`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
-`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
-
-`$scripts_root_dir/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $data_root/in/lex.4 -phrase-translation-table $out_file.phrase-table` if $OUTPUT_RULES;
-
-sub create_xml {
-    my ($source,$input,$target,$alignment,$path) = @_;
-    
-    my @INPUT = split(/ /,$input);
-    my @SOURCE = split(/ /,$source);
-    my @TARGET = split(/ /,$target);
-    my %ALIGN = &create_alignment($alignment);
-    
-    my %FRAME_INPUT;
-    my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
-    foreach (@TARGET) { push @TARGET_BITMAP,1 }
-    
-    ### STEP 1: FIND MISMATCHES
-
-    my ($s,$i) = (0,0);
-    my $currently_matching = 0;
-    my ($start_s,$start_i) = (0,0);
-
-    $path .= "X"; # indicate end
-    print "$input\n$source\n$target\n$path\n";
-    for(my $p=0;$p<length($path);$p++) {
-	my $action = substr($path,$p,1);
-
-	# beginning of a mismatch
-	if ($currently_matching && $action ne "M" && $action ne "X") {
-	    $start_i = $i;
-	    $start_s = $s;
-	    $currently_matching = 0;
-	}
-	
-	# end of a mismatch
-	elsif (!$currently_matching && 
-	       ($action eq "M" || $action eq "X")) {
-	    
-	    # remove use of affected target words
-	    for(my $ss = $start_s; $ss<$s; $ss++) {
-		foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-		    $TARGET_BITMAP[$tt] = 0;
-		}
-		
-		# also remove enclosed unaligned words?
-	    }
-	    
-	    # are there input words that need to be inserted ?
-	    print "($start_i<$i)?\n";
-	    if ($start_i<$i) {
-		
-		# take note of input words to be inserted
-		my $insertion = "";
-		for(my $ii = $start_i; $ii<$i; $ii++) {
-		    $insertion .= $INPUT[$ii]." ";
-		}
-		
-		# find position for inserted input words
-		
-		# find first removed target word
-		my $start_t = 1000;
-		for(my $ss = $start_s; $ss<$s; $ss++) {
-		    foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			$start_t = $tt if $tt < $start_t;
-		    }
-		}
-
-		# end of sentence? add to end
-		if ($start_t == 1000 && $i > $#INPUT) {
-		    $start_t = $#TARGET;
-		}
-		
-		# backtrack to previous words if unaligned
-		if ($start_t == 1000) {
-		    $start_t = -1;
-		    for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
-			foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
-			    $start_t = $tt if $tt > $start_t;
-			}
-		    }
-		}
-		$FRAME_INPUT{$start_t} .= $insertion;
-		my %NT = ("start_t" => $start_t,
-			  "start_i" => $start_i );
-		push @NT,\%NT;		
-	    }	    
-	    $currently_matching = 1;
-	}
-	
-	print "$action $s $i ($start_s $start_i) $currently_matching";
-	if ($action ne "I") {
-	    print " ->";
-	    foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
-		print " ".$tt;
-	    }
-	}
-	print "\n";
-	$s++ unless $action eq "I";
-	$i++ unless $action eq "D";
-	$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
-	push @INPUT_BITMAP, 1 if $action eq "M";
-	push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
-    }
-    
-
-    print $target."\n";
-    foreach (@TARGET_BITMAP) { print $_; } print "\n";
-    foreach (sort keys %FRAME_INPUT) { 
-	print "$_: $FRAME_INPUT{$_}\n";
-    }
-
-    ### STEP 2: BUILD RULE AND FRAME
-        
-    # hierarchical rule
-    my $rule_s = "";
-    my $rule_pos_s = 0;
-    my %RULE_ALIGNMENT_S;
-    for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
-	if ($INPUT_BITMAP[$i]) {
-	    $rule_s .= $INPUT[$i]." ";
-	    $RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
-	}
-	foreach my $NT (@NT) {
-	    if ($i == $$NT{"start_i"}) {
-		$rule_s .= "[X][X] ";
-		$$NT{"rule_pos_s"} = $rule_pos_s++;
-	    }
-	}
-    }
-
-    my $rule_t = "";
-    my $rule_pos_t = 0;
-    my %RULE_ALIGNMENT_T;
-    for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
-	if ($t>=0 && $TARGET_BITMAP[$t]) {
-	    $rule_t .= $TARGET[$t]." ";
-	    $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
-	}
-	foreach my $NT (@NT) {
-	    if ($t == $$NT{"start_t"}) {
-		$rule_t .= "[X][X] ";
-		$$NT{"rule_pos_t"} = $rule_pos_t++;
-	    }
-	}
-    }
-
-    my $rule_alignment = "";
-    foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
-	foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
-	    next unless defined($RULE_ALIGNMENT_T{$t});
-	    $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
-	}
-    }
-    foreach my $NT (@NT) {
-	$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
-    }
-    
-    chop($rule_s);
-    chop($rule_t);
-    chop($rule_alignment);
-
-    my $rule_alignment_inv = "";
-    foreach (split(/ /,$rule_alignment)) {
-	/^(\d+)\-(\d+)$/;
-	$rule_alignment_inv .= "$2-$1 ";
-    }
-    chop($rule_alignment_inv);
-
-    # frame
-    my $frame = "";
-    $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
-
-    my $currently_included = 0;
-    my $start_t = -1;
-    push @TARGET_BITMAP,0; # indicate end
-
-    for(my $t=0;$t<=scalar(@TARGET);$t++) {	    
-	# beginning of tm target inclusion
-	if (!$currently_included && $TARGET_BITMAP[$t]) {
-	    $start_t = $t;
-	    $currently_included = 1;
-	}
-	
-	# end of tm target inclusion (not included word or inserted input)
-	elsif ($currently_included && 
-	       (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
-	    # add xml (unless change is at the beginning of the sentence
-	    if ($start_t >= 0) {
-		my $target = "";
-		print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
-		for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
-		    $target .= $TARGET[$tt] . " ";
-		}
-		chop($target);
-		$frame .= "<xml translation=\"$target\"> x </xml> ";
-	    }
-	    $currently_included = 0;
-	}
-	
-	$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
-	print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
-    }
-
-    print $frame."\n-------------------------------------\n";
-    return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
-}
-
-sub create_alignment {
-	my ($line) = @_;
-	my (@ALIGNED_TO_S,@ALIGNED_TO_T);
-	foreach my $point (split(/ /,$line)) {
-		my ($s,$t) = split(/\-/,$point);
-		$ALIGNED_TO_S[$s]{$t}++;
-		$ALIGNED_TO_T[$t]{$s}++;
-	}
-	my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
-	return %ALIGNMENT;
-}
diff --git a/contrib/fuzzy-match/suffix-test.cpp b/contrib/fuzzy-match/suffix-test.cpp
deleted file mode 100644
index 01b722fb4..000000000
--- a/contrib/fuzzy-match/suffix-test.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "SuffixArray.h"
-
-using namespace std;
-
-int main(int argc, char* argv[]) 
-{
-	SuffixArray suffixArray( "/home/pkoehn/syntax/grammars/wmt09-de-en/corpus.1k.de" );
-	//suffixArray.List(10,20);
-	vector< string > der;
-	der.push_back("der");
-	vector< string > inDer;
-	inDer.push_back("in");
-	inDer.push_back("der");
-	vector< string > zzz;
-	zzz.push_back("zzz");
-	vector< string > derDer;
-	derDer.push_back("der");
-	derDer.push_back("der");
-
-	cout << "count of 'der' " << suffixArray.Count( der ) << endl;
-	cout << "limited count of 'der' " << suffixArray.MinCount( der, 2 ) << endl;
-	cout << "count of 'in der' " << suffixArray.Count( inDer ) << endl;
-	cout << "count of 'der der' " << suffixArray.Count( derDer ) << endl;
-	cout << "limited count of 'der der' " << suffixArray.MinCount( derDer, 1 ) << endl;
-	// cout << "count of 'zzz' " << suffixArray.Count( zzz ) << endl;
-	// cout << "limited count of 'zzz' " << suffixArray.LimitedCount( zzz, 1 ) << endl;
-}
diff --git a/contrib/other-builds/OnDiskPt/.cproject b/contrib/other-builds/OnDiskPt/.cproject
index 472888f48..9b25f636f 100644
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@@ -47,6 +47,7 @@
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
 									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+									<listOptionValue builtIn="false" value="WITH_THREADS"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
diff --git a/contrib/other-builds/lm.xcodeproj/project.pbxproj b/contrib/other-builds/lm.xcodeproj/project.pbxproj
index 2488f1439..2598ae994 100644
--- a/contrib/other-builds/lm.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/lm.xcodeproj/project.pbxproj
@@ -83,6 +83,8 @@
 		1EBA459F14B97E92003CC0EA /* string_piece.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455914B97E92003CC0EA /* string_piece.hh */; };
 		1EBA45A014B97E92003CC0EA /* tokenize_piece_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA455A14B97E92003CC0EA /* tokenize_piece_test.cc */; };
 		1EBA45A114B97E92003CC0EA /* tokenize_piece.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455B14B97E92003CC0EA /* tokenize_piece.hh */; };
+		1EC2B30916233A8C00614D71 /* usage.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EC2B30716233A8C00614D71 /* usage.cc */; };
+		1EC2B30A16233A8C00614D71 /* usage.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EC2B30816233A8C00614D71 /* usage.hh */; };
 /* End PBXBuildFile section */
 
 /* Begin PBXContainerItemProxy section */
@@ -185,6 +187,8 @@
 		1EBA455A14B97E92003CC0EA /* tokenize_piece_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tokenize_piece_test.cc; path = ../../util/tokenize_piece_test.cc; sourceTree = "<group>"; };
 		1EBA455B14B97E92003CC0EA /* tokenize_piece.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = tokenize_piece.hh; path = ../../util/tokenize_piece.hh; sourceTree = "<group>"; };
 		1EBA455C14B97E92003CC0EA /* util.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = util.xcodeproj; path = ../../util/util.xcodeproj; sourceTree = "<group>"; };
+		1EC2B30716233A8C00614D71 /* usage.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = usage.cc; path = ../../util/usage.cc; sourceTree = "<group>"; };
+		1EC2B30816233A8C00614D71 /* usage.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = usage.hh; path = ../../util/usage.hh; sourceTree = "<group>"; };
 		1EE8C2E91476A48E002496F2 /* liblm.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblm.a; sourceTree = BUILT_PRODUCTS_DIR; };
 /* End PBXFileReference section */
 
@@ -261,6 +265,8 @@
 		1EBA44FC14B97E81003CC0EA /* util */ = {
 			isa = PBXGroup;
 			children = (
+				1EC2B30716233A8C00614D71 /* usage.cc */,
+				1EC2B30816233A8C00614D71 /* usage.hh */,
 				1EBA453614B97E92003CC0EA /* bit_packing_test.cc */,
 				1EBA453714B97E92003CC0EA /* bit_packing.cc */,
 				1EBA453814B97E92003CC0EA /* bit_packing.hh */,
@@ -377,6 +383,7 @@
 				1EBA45A114B97E92003CC0EA /* tokenize_piece.hh in Headers */,
 				1E890C72159D1B260031F9F3 /* value_build.hh in Headers */,
 				1E890C73159D1B260031F9F3 /* value.hh in Headers */,
+				1EC2B30A16233A8C00614D71 /* usage.hh in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -479,6 +486,7 @@
 				1EBA459D14B97E92003CC0EA /* sorted_uniform_test.cc in Sources */,
 				1EBA45A014B97E92003CC0EA /* tokenize_piece_test.cc in Sources */,
 				1E890C71159D1B260031F9F3 /* value_build.cc in Sources */,
+				1EC2B30916233A8C00614D71 /* usage.cc in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject
index dfebdd577..a620366e4 100644
--- a/contrib/other-builds/moses-chart-cmd/.cproject
+++ b/contrib/other-builds/moses-chart-cmd/.cproject
@@ -29,6 +29,9 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../moses/src&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.1785368241" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="WITH_THREADS"/>
+								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1402496521" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.827478809" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
@@ -52,6 +55,7 @@
 									<listOptionValue builtIn="false" value="z"/>
 									<listOptionValue builtIn="false" value="rt"/>
 									<listOptionValue builtIn="false" value="boost_system"/>
+									<listOptionValue builtIn="false" value="boost_thread"/>
 									<listOptionValue builtIn="false" value="boost_filesystem"/>
 									<listOptionValue builtIn="false" value="lm"/>
 									<listOptionValue builtIn="false" value="util"/>
diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject
index 1428de199..7fa7b6911 100644
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@@ -29,6 +29,9 @@
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../moses/src&quot;"/>
 									<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
 								</option>
+								<option id="gnu.cpp.compiler.option.preprocessor.def.849384962" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="WITH_THREADS"/>
+								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.363379373" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
 							<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.504208780" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
@@ -52,6 +55,7 @@
 									<listOptionValue builtIn="false" value="z"/>
 									<listOptionValue builtIn="false" value="rt"/>
 									<listOptionValue builtIn="false" value="boost_system"/>
+									<listOptionValue builtIn="false" value="boost_thread"/>
 									<listOptionValue builtIn="false" value="lm"/>
 									<listOptionValue builtIn="false" value="util"/>
 								</option>
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 05eb2df40..bbf020b98 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -37,6 +37,8 @@
 									<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+									<listOptionValue builtIn="false" value="IS_ECLIPSE"/>
+									<listOptionValue builtIn="false" value="WITH_THREADS"/>
 									<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
 									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
 									<listOptionValue builtIn="false" value="LM_IRST"/>
diff --git a/contrib/other-builds/util/.cproject b/contrib/other-builds/util/.cproject
index 8ea5ab73b..0893d942f 100644
--- a/contrib/other-builds/util/.cproject
+++ b/contrib/other-builds/util/.cproject
@@ -46,6 +46,7 @@
 								</option>
 								<option id="gnu.cpp.compiler.option.preprocessor.def.1952961175" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
 									<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+									<listOptionValue builtIn="false" value="WITH_THREADS"/>
 								</option>
 								<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
 							</tool>
author	Barry Haddow <barry.haddow@gmail.com>	2012-10-14 17:18:03 +0400
committer	Barry Haddow <barry.haddow@gmail.com>	2012-10-14 17:18:03 +0400
commit	61ae24aa5d1db2c117c1aa90ca65ef4fe41490fa (patch)
tree	60d5814cfcabe7325ff7b0a8dfbe8c0e3d1e1a7f /contrib
parent	732b299ba3612094af67f4e336011fe337089e8a (diff)
parent	0d9095983d70a032fb596366e2445e338301b212 (diff)