beautify

author: Hieu Hoang <hieu@hoang.co.uk> 2013-05-29 21:16:15 +0400
committer: Hieu Hoang <hieu@hoang.co.uk> 2013-05-29 21:16:15 +0400
commit: 6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree: 3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /biconcor
parent: 59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)
10 files changed, 416 insertions, 413 deletions
diff --git a/biconcor/Alignment.cpp b/biconcor/Alignment.cpp
index e73e18840..814802531 100644
--- a/biconcor/Alignment.cpp
+++ b/biconcor/Alignment.cpp
@@ -5,7 +5,8 @@
 #include <stdlib.h>
 #include <cstring>
 
-namespace {
+namespace
+{
 
 const int LINE_MAX_LENGTH = 10000;
 
@@ -84,10 +85,10 @@ void Alignment::Create(const string& fileName)
 }
 
 Alignment::Alignment()
-    : m_array(NULL),
-      m_sentenceEnd(NULL),
-      m_size(0),
-      m_sentenceCount(0) {}
+  : m_array(NULL),
+    m_sentenceEnd(NULL),
+    m_size(0),
+    m_sentenceCount(0) {}
 
 Alignment::~Alignment()
 {
diff --git a/biconcor/Mismatch.cpp b/biconcor/Mismatch.cpp
index 31140b200..c3afec781 100644
--- a/biconcor/Mismatch.cpp
+++ b/biconcor/Mismatch.cpp
@@ -23,16 +23,16 @@ enum {
 };
 
 Mismatch::Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, int source_length, int target_length, int source_start, int source_end )
-    :m_suffixArray(sa)
-    ,m_targetCorpus(tc)
-    ,m_alignment(a)
-    ,m_sentence_id(sentence_id)
-    ,m_source_length(source_length)
-    ,m_target_length(target_length)
-    ,m_source_position(position)
-    ,m_source_start(source_start)
-    ,m_source_end(source_end)
-    ,m_unaligned(true)
+  :m_suffixArray(sa)
+  ,m_targetCorpus(tc)
+  ,m_alignment(a)
+  ,m_sentence_id(sentence_id)
+  ,m_source_length(source_length)
+  ,m_target_length(target_length)
+  ,m_source_position(position)
+  ,m_source_start(source_start)
+  ,m_source_end(source_end)
+  ,m_unaligned(true)
 {
   // initialize unaligned indexes
   for (int i = 0; i < m_source_length; i++) {
@@ -42,7 +42,7 @@ Mismatch::Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sente
     m_target_unaligned[i] = true;
   }
   m_num_alignment_points =
-      m_alignment->GetNumberOfAlignmentPoints( sentence_id );
+    m_alignment->GetNumberOfAlignmentPoints( sentence_id );
   for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
     m_source_unaligned[ (int)m_alignment->GetSourceWord( sentence_id, ap ) ] = false;
     m_target_unaligned[ (int)m_alignment->GetTargetWord( sentence_id, ap ) ] = false;
@@ -58,234 +58,235 @@ Mismatch::~Mismatch () {}
 
 void Mismatch::PrintClippedHTML( ostream* out, int width )
 {
-    int source_annotation[256], target_annotation[256];
-    vector< string > label_class;
-	label_class.push_back( "" );
-	label_class.push_back( "mismatch_pre_aligned" );
-	label_class.push_back( "mismatch_post_aligned" );
-	label_class.push_back( "null_aligned" );
-	label_class.push_back( "mismatch_misaligned" );
-	label_class.push_back( "mismatch_aligned" );
+  int source_annotation[256], target_annotation[256];
+  vector< string > label_class;
+  label_class.push_back( "" );
+  label_class.push_back( "mismatch_pre_aligned" );
+  label_class.push_back( "mismatch_post_aligned" );
+  label_class.push_back( "null_aligned" );
+  label_class.push_back( "mismatch_misaligned" );
+  label_class.push_back( "mismatch_aligned" );
 
-	for(int i=0; i<m_source_length;i++) source_annotation[i] = UNANNOTATED;
-	for(int i=0; i<m_target_length;i++) target_annotation[i] = UNANNOTATED;
-	
-	if (m_unaligned) {
-		// find alignment points for prior and next word(s) and
-		// center target phrase around those.
-		bool found_aligned = false;
-		for(int i=1; i<m_source_length && !found_aligned; i++) {
-			if (m_source_start-i >= 0) {
-				int word_id =  m_source_start-i;
-				source_annotation[ word_id ] = UNALIGNED;
-				if (!m_source_unaligned[ word_id ]) {
-					found_aligned = true;
-					LabelSourceMatches( source_annotation, target_annotation, word_id, PRE_ALIGNED );
-				}
-			}
+  for(int i=0; i<m_source_length; i++) source_annotation[i] = UNANNOTATED;
+  for(int i=0; i<m_target_length; i++) target_annotation[i] = UNANNOTATED;
 
-			if (m_source_end+i < m_source_length) {
-				int word_id = m_source_end+i;
-				source_annotation[ word_id ] = UNALIGNED;
-				if (!m_source_unaligned[ word_id ]) {
-					found_aligned = true;
-					LabelSourceMatches( source_annotation, target_annotation, word_id, POST_ALIGNED );
-				}
-			}
-		}
-		
-	}
-	// misalignment
-	else {
-		// label aligned output words
-		for(int i=m_source_start; i<=m_source_end; i++)
-			LabelSourceMatches( source_annotation, target_annotation, i, ALIGNED );
+  if (m_unaligned) {
+    // find alignment points for prior and next word(s) and
+    // center target phrase around those.
+    bool found_aligned = false;
+    for(int i=1; i<m_source_length && !found_aligned; i++) {
+      if (m_source_start-i >= 0) {
+        int word_id =  m_source_start-i;
+        source_annotation[ word_id ] = UNALIGNED;
+        if (!m_source_unaligned[ word_id ]) {
+          found_aligned = true;
+          LabelSourceMatches( source_annotation, target_annotation, word_id, PRE_ALIGNED );
+        }
+      }
 
-		// find first and last
-		int target_start = -1;
-		int target_end;
-		for(int i=0; i<m_target_length; i++)
-			if (target_annotation[i] == ALIGNED) {
-				if (target_start == -1)
-					target_start = i;
-				target_end = i;
-			}
-		// go over all enclosed target words
-		for(int i=target_start; i<=target_end; i++) {
-			// label other target words as unaligned or misaligned
-			if (m_target_unaligned[ i ])
-				target_annotation[ i ] = UNALIGNED;
-			else {
-				if (target_annotation[ i ] != ALIGNED)
-					target_annotation[ i ] = MISALIGNED;
-				// loop over aligned source words
-				for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
-					if (m_alignment->GetTargetWord( m_sentence_id, ap ) == i) {
-						int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
-						// if not part of the source phrase -> also misaligned
-						if (source_word < m_source_start || source_word > m_source_end)
-							source_annotation[ source_word ] = MISALIGNED;
-					}
-				}
-			}						
-		}
-		// closure
-		bool change = true;
-		while(change) {
-			change = false;
-			for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
-				int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
-				int target_word = m_alignment->GetTargetWord( m_sentence_id, ap );
-				if (source_annotation[source_word] != UNANNOTATED &&
-						target_annotation[target_word] == UNANNOTATED) {
-					target_annotation[target_word] = MISALIGNED;
-					change = true;
-				}
-				if (source_annotation[source_word] == UNANNOTATED &&
-						target_annotation[target_word] != UNANNOTATED) {
-					source_annotation[source_word] = MISALIGNED;
-					change = true;
-				}
-			}
-		}
-	}
-	
-	// print source
-	// shorten source context if too long
+      if (m_source_end+i < m_source_length) {
+        int word_id = m_source_end+i;
+        source_annotation[ word_id ] = UNALIGNED;
+        if (!m_source_unaligned[ word_id ]) {
+          found_aligned = true;
+          LabelSourceMatches( source_annotation, target_annotation, word_id, POST_ALIGNED );
+        }
+      }
+    }
+
+  }
+  // misalignment
+  else {
+    // label aligned output words
+    for(int i=m_source_start; i<=m_source_end; i++)
+      LabelSourceMatches( source_annotation, target_annotation, i, ALIGNED );
+
+    // find first and last
+    int target_start = -1;
+    int target_end;
+    for(int i=0; i<m_target_length; i++)
+      if (target_annotation[i] == ALIGNED) {
+        if (target_start == -1)
+          target_start = i;
+        target_end = i;
+      }
+    // go over all enclosed target words
+    for(int i=target_start; i<=target_end; i++) {
+      // label other target words as unaligned or misaligned
+      if (m_target_unaligned[ i ])
+        target_annotation[ i ] = UNALIGNED;
+      else {
+        if (target_annotation[ i ] != ALIGNED)
+          target_annotation[ i ] = MISALIGNED;
+        // loop over aligned source words
+        for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+          if (m_alignment->GetTargetWord( m_sentence_id, ap ) == i) {
+            int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
+            // if not part of the source phrase -> also misaligned
+            if (source_word < m_source_start || source_word > m_source_end)
+              source_annotation[ source_word ] = MISALIGNED;
+          }
+        }
+      }
+    }
+    // closure
+    bool change = true;
+    while(change) {
+      change = false;
+      for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+        int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
+        int target_word = m_alignment->GetTargetWord( m_sentence_id, ap );
+        if (source_annotation[source_word] != UNANNOTATED &&
+            target_annotation[target_word] == UNANNOTATED) {
+          target_annotation[target_word] = MISALIGNED;
+          change = true;
+        }
+        if (source_annotation[source_word] == UNANNOTATED &&
+            target_annotation[target_word] != UNANNOTATED) {
+          source_annotation[source_word] = MISALIGNED;
+          change = true;
+        }
+      }
+    }
+  }
+
+  // print source
+  // shorten source context if too long
   int sentence_start = m_source_position - m_source_start;
-	int context_space = width/2;
-	for(int i=m_source_start;i<=m_source_end;i++)
-		context_space -= m_suffixArray->GetWord( sentence_start + i ).size() + 1;
-	context_space /= 2;
+  int context_space = width/2;
+  for(int i=m_source_start; i<=m_source_end; i++)
+    context_space -= m_suffixArray->GetWord( sentence_start + i ).size() + 1;
+  context_space /= 2;
 
-	int remaining = context_space;
-	int start_word = m_source_start;
-	for(;start_word>0 && remaining>0; start_word--)
-		remaining -= m_suffixArray->GetWord( sentence_start + start_word-1 ).size() + 1;
-	if (remaining<0 || start_word == -1) start_word++;
+  int remaining = context_space;
+  int start_word = m_source_start;
+  for(; start_word>0 && remaining>0; start_word--)
+    remaining -= m_suffixArray->GetWord( sentence_start + start_word-1 ).size() + 1;
+  if (remaining<0 || start_word == -1) start_word++;
 
-	remaining = context_space;
-	int end_word = m_source_end;
-	for(;end_word<m_source_length && remaining>0; end_word++)
-		remaining -= m_suffixArray->GetWord( sentence_start + end_word ).size() + 1;
-	end_word--;
+  remaining = context_space;
+  int end_word = m_source_end;
+  for(; end_word<m_source_length && remaining>0; end_word++)
+    remaining -= m_suffixArray->GetWord( sentence_start + end_word ).size() + 1;
+  end_word--;
 
-	// output with markup
-	*out << "<tr><td class=\"pp_source_left\">";
-	char current_label = UNANNOTATED;
-	if (start_word>0) {
-		current_label = source_annotation[start_word-1];
-		*out << "... ";
-	}
-	for(int i=start_word; i<=end_word; i++) {
-		// change to phrase block
-		if (i == m_source_start) {
-			if (current_label != UNANNOTATED && i!=start_word) 
-				*out << "</span>";				
-			*out << "</td><td class=\"pp_source\">";
-			current_label = UNANNOTATED;
-		}
+  // output with markup
+  *out << "<tr><td class=\"pp_source_left\">";
+  char current_label = UNANNOTATED;
+  if (start_word>0) {
+    current_label = source_annotation[start_word-1];
+    *out << "... ";
+  }
+  for(int i=start_word; i<=end_word; i++) {
+    // change to phrase block
+    if (i == m_source_start) {
+      if (current_label != UNANNOTATED && i!=start_word)
+        *out << "</span>";
+      *out << "</td><td class=\"pp_source\">";
+      current_label = UNANNOTATED;
+    }
 
-		// change to labeled word 
-		else if (source_annotation[i] != current_label &&
-						 source_annotation[i] != ALIGNED) {
-			if (current_label != UNANNOTATED && i!=start_word) 
-				*out << "</span>";
-			if (source_annotation[i] != UNANNOTATED)
-				*out << "<span class=\""
-						 << label_class[ source_annotation[i] ]
-						 << "\">";
-			current_label = source_annotation[i];
-		}
+    // change to labeled word
+    else if (source_annotation[i] != current_label &&
+             source_annotation[i] != ALIGNED) {
+      if (current_label != UNANNOTATED && i!=start_word)
+        *out << "</span>";
+      if (source_annotation[i] != UNANNOTATED)
+        *out << "<span class=\""
+             << label_class[ source_annotation[i] ]
+             << "\">";
+      current_label = source_annotation[i];
+    }
 
-		// output word
-		*out << m_suffixArray->GetWord( sentence_start + i ) << " ";
+    // output word
+    *out << m_suffixArray->GetWord( sentence_start + i ) << " ";
 
-		// change to right context block
-		if (i == m_source_end) {
-			*out << "</td><td class=\"pp_source_right\">";
-			current_label = UNANNOTATED;
-		}
-	}
+    // change to right context block
+    if (i == m_source_end) {
+      *out << "</td><td class=\"pp_source_right\">";
+      current_label = UNANNOTATED;
+    }
+  }
 
-	if (current_label != UNANNOTATED && end_word>m_source_end)
-		*out << "</span>";
-	if (end_word<m_source_length-1)
-		*out << "... ";
+  if (current_label != UNANNOTATED && end_word>m_source_end)
+    *out << "</span>";
+  if (end_word<m_source_length-1)
+    *out << "... ";
 
-	// print target
-	// shorten target context if too long
-	int target_start = -1;
-	int target_end;
-	for(int i=0; i<m_target_length; i++)
-		if (target_annotation[i] != UNANNOTATED) {
-			if (target_start == -1) 
-				target_start = i;
-			target_end = i;
-		}
+  // print target
+  // shorten target context if too long
+  int target_start = -1;
+  int target_end;
+  for(int i=0; i<m_target_length; i++)
+    if (target_annotation[i] != UNANNOTATED) {
+      if (target_start == -1)
+        target_start = i;
+      target_end = i;
+    }
 
-	context_space = width/2;
-	for(int i=target_start;i<=target_end;i++)
-		context_space -= m_targetCorpus->GetWord( m_sentence_id, i ).size() + 1;
-	while (context_space < 0) { // shorten matched part, if too long
-		context_space += 
-			m_targetCorpus->GetWord( m_sentence_id, target_start ).size() +
-			m_targetCorpus->GetWord( m_sentence_id, target_end ).size() + 2;
-		target_start++;
-		target_end--;
-	}
-	context_space /= 2;
+  context_space = width/2;
+  for(int i=target_start; i<=target_end; i++)
+    context_space -= m_targetCorpus->GetWord( m_sentence_id, i ).size() + 1;
+  while (context_space < 0) { // shorten matched part, if too long
+    context_space +=
+      m_targetCorpus->GetWord( m_sentence_id, target_start ).size() +
+      m_targetCorpus->GetWord( m_sentence_id, target_end ).size() + 2;
+    target_start++;
+    target_end--;
+  }
+  context_space /= 2;
 
-	remaining = context_space;
-	start_word = target_start;
-	for(;start_word>0 && remaining>0; start_word--) {
-		//cerr << "remaining: " << remaining << ", start_word: " << start_word << endl;
-		remaining -= m_targetCorpus->GetWord( m_sentence_id, start_word-1 ).size() + 1;
-	}
-	if (remaining<0 || start_word == -1) start_word++;
+  remaining = context_space;
+  start_word = target_start;
+  for(; start_word>0 && remaining>0; start_word--) {
+    //cerr << "remaining: " << remaining << ", start_word: " << start_word << endl;
+    remaining -= m_targetCorpus->GetWord( m_sentence_id, start_word-1 ).size() + 1;
+  }
+  if (remaining<0 || start_word == -1) start_word++;
 
-	remaining = context_space;
-	end_word = target_end;
-	for(;end_word<m_target_length && remaining>0; end_word++) {
-		//cerr << "remaining: " << remaining << ", end_word: " << end_word << endl;
-		remaining -= m_targetCorpus->GetWord( m_sentence_id, end_word ).size() + 1;
-	}
-	end_word--;
+  remaining = context_space;
+  end_word = target_end;
+  for(; end_word<m_target_length && remaining>0; end_word++) {
+    //cerr << "remaining: " << remaining << ", end_word: " << end_word << endl;
+    remaining -= m_targetCorpus->GetWord( m_sentence_id, end_word ).size() + 1;
+  }
+  end_word--;
 
-	// output with markup
-	*out << "</td><td class=\"mismatch_target\">";
-	current_label = UNANNOTATED;
-	if (start_word>0) {
-		current_label = target_annotation[start_word-1];
-		*out << "... ";
-	}
-	for(int i=start_word; i<=end_word; i++) {
-		if (target_annotation[i] != current_label) {
-			if (current_label != UNANNOTATED && i!=start_word) 
-				*out << "</span>";
-			if (target_annotation[i] != UNANNOTATED)
-				*out << "<span class=\""
-						 << label_class[ target_annotation[i] ]
-						 << "\">";
-			current_label = target_annotation[i];
-		}
+  // output with markup
+  *out << "</td><td class=\"mismatch_target\">";
+  current_label = UNANNOTATED;
+  if (start_word>0) {
+    current_label = target_annotation[start_word-1];
+    *out << "... ";
+  }
+  for(int i=start_word; i<=end_word; i++) {
+    if (target_annotation[i] != current_label) {
+      if (current_label != UNANNOTATED && i!=start_word)
+        *out << "</span>";
+      if (target_annotation[i] != UNANNOTATED)
+        *out << "<span class=\""
+             << label_class[ target_annotation[i] ]
+             << "\">";
+      current_label = target_annotation[i];
+    }
 
-		// output word
-		*out << m_targetCorpus->GetWord( m_sentence_id, i ) << " ";
-	}
+    // output word
+    *out << m_targetCorpus->GetWord( m_sentence_id, i ) << " ";
+  }
 
-	if (current_label != UNANNOTATED && end_word>target_end)
-		*out << "</span>";
-	if (end_word<m_target_length-1)
-		*out << "... ";
-	*out << "</td></tr>";
+  if (current_label != UNANNOTATED && end_word>target_end)
+    *out << "</span>";
+  if (end_word<m_target_length-1)
+    *out << "... ";
+  *out << "</td></tr>";
 }
 
-void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label ) {
-	for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
-		if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
-			source_annotation[ source_id ] = label;
-			target_annotation[ m_alignment->GetTargetWord( m_sentence_id, ap ) ] = label;
-		}
-	}
+void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label )
+{
+  for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+    if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
+      source_annotation[ source_id ] = label;
+      target_annotation[ m_alignment->GetTargetWord( m_sentence_id, ap ) ] = label;
+    }
+  }
 }
diff --git a/biconcor/Mismatch.h b/biconcor/Mismatch.h
index c0063d049..1277ed95a 100644
--- a/biconcor/Mismatch.h
+++ b/biconcor/Mismatch.h
@@ -34,7 +34,9 @@ public:
   Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, int source_length, int target_length, int source_start, int source_end );
   ~Mismatch();
 
-  bool Unaligned() const { return m_unaligned; }
+  bool Unaligned() const {
+    return m_unaligned;
+  }
   void PrintClippedHTML(std::ostream* out, int width );
   void LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label );
 };
diff --git a/biconcor/PhrasePair.cpp b/biconcor/PhrasePair.cpp
index 038fa3a31..b6409258b 100644
--- a/biconcor/PhrasePair.cpp
+++ b/biconcor/PhrasePair.cpp
@@ -37,7 +37,7 @@ void PhrasePair::Print( ostream* out ) const
   INDEX ap_points = m_alignment->GetNumberOfAlignmentPoints( m_sentence_id );
   for( INDEX i=0; i<ap_points; i++) {
     *out << " " << m_alignment->GetSourceWord( m_sentence_id, i )
-	 << "-" << m_alignment->GetTargetWord( m_sentence_id, i );
+         << "-" << m_alignment->GetTargetWord( m_sentence_id, i );
   }
 
   *out << endl;
@@ -185,27 +185,27 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width ) const
   size_t source_pre_width = (source_width-source.size())/2;
   size_t source_post_width = (source_width-source.size()+1)/2;
 
-	// if phrase is too long, don't show any context
+  // if phrase is too long, don't show any context
   if (source.size() > (size_t)width) {
     source_pre_width = 0;
     source_post_width = 0;
   }
-	// too long -> truncate and add "..."
+  // too long -> truncate and add "..."
   if (source_pre.size() > source_pre_width) {
-		// first skip up to a space
-		while(source_pre_width>0 &&
-					source_pre.substr(source_pre.size()-source_pre_width,1) != " ") {
-			source_pre_width--;
-		}
+    // first skip up to a space
+    while(source_pre_width>0 &&
+          source_pre.substr(source_pre.size()-source_pre_width,1) != " ") {
+      source_pre_width--;
+    }
     source_pre = "..." + source_pre.substr( source_pre.size()-source_pre_width, source_pre_width );
-	}
+  }
   if (source_post.size() > source_post_width) {
-		while(source_post_width>0 &&
-					source_post.substr(source_post_width-1,1) != " ") {
-			source_post_width--;
-		}
+    while(source_post_width>0 &&
+          source_post.substr(source_post_width-1,1) != " ") {
+      source_post_width--;
+    }
     source_post = source_post.substr( 0, source_post_width ) + "...";
-	}
+  }
 
   *out << "<tr><td class=\"pp_source_left\">"
        << source_pre
@@ -220,13 +220,13 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width ) const
   string target_pre = "";
   string target = "";
   string target_post = "";
-	size_t target_pre_null_width = 0;
-	size_t target_post_null_width = 0;
+  size_t target_pre_null_width = 0;
+  size_t target_post_null_width = 0;
   for( char i=0; i<m_target_start; i++ ) {
-		WORD word = m_targetCorpus->GetWord( m_sentence_id, i);
+    WORD word = m_targetCorpus->GetWord( m_sentence_id, i);
     target_pre += " " + word;
-		if (i >= m_target_start-m_pre_null)
-			target_pre_null_width += word.size() + 1;
+    if (i >= m_target_start-m_pre_null)
+      target_pre_null_width += word.size() + 1;
   }
   for( char i=m_target_start; i<=m_target_end; i++ ) {
     if (i>m_target_start) target += " ";
@@ -234,11 +234,11 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width ) const
   }
   for( char i=m_target_end+1; i<m_target_length; i++ ) {
     if (i>m_target_end+1) target_post += " ";
-		WORD word = m_targetCorpus->GetWord( m_sentence_id, i);
+    WORD word = m_targetCorpus->GetWord( m_sentence_id, i);
     target_post += word;
-		if (i-(m_target_end+1) < m_post_null) {
-			target_post_null_width += word.size() + 1;
-		}
+    if (i-(m_target_end+1) < m_post_null) {
+      target_post_null_width += word.size() + 1;
+    }
   }
 
   size_t target_pre_width = (target_width-target.size())/2;
@@ -249,46 +249,45 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width ) const
     target_post_width = 0;
   }
 
-  if (target_pre.size() < target_pre_width) 
-		target_pre_width = target_pre.size();
-	else {
-		while(target_pre_width>0 &&
-					target_pre.substr(target_pre.size()-target_pre_width,1) != " ") {
-			target_pre_width--;
-		}
+  if (target_pre.size() < target_pre_width)
+    target_pre_width = target_pre.size();
+  else {
+    while(target_pre_width>0 &&
+          target_pre.substr(target_pre.size()-target_pre_width,1) != " ") {
+      target_pre_width--;
+    }
     target_pre = "..." + target_pre.substr( target_pre.size()-target_pre_width, target_pre_width );
-	}
-
-	if (target_post.size() < target_post_width) {
-		target_post_width = target_post.size();
-	}
-	else {
-		while(target_post_width>0 &&
-					target_post.substr(target_post_width-1,1) != " ") {
-			target_post_width--;
-		}
-		target_post = target_post.substr( 0, target_post_width ) + "...";
-	}
-
-	if (m_pre_null) {
-		//cerr << endl << "target_pre_width=" << target_pre_width << ", target_pre_null_width=" << target_pre_null_width << ", target_pre.size()=" << target_pre.size() << endl;
-		if (target_pre_width < target_pre.size())
-			target_pre_null_width -= target_pre.size()-target_pre_width;
-		target_pre = target_pre.substr(0,target_pre_width-target_pre_null_width) 
-			+ "<span class=\"null_aligned\">"
-			+ target_pre.substr(target_pre_width-target_pre_null_width)
-			+ "</span>";
-	}
-	if (m_post_null) {
-		//cerr << endl << "target_post_width=" << target_post_width << ", target_post_null_width=" << target_post_null_width << ", target_post.size()=" << target_post.size() << endl;
-		if (target_post_null_width > target_post.size()) {
-			target_post_null_width = target_post.size();
-		}
-		target_post = "<span class=\"null_aligned\">"
-			+ target_post.substr(0,target_post_null_width) 
-			+ "</span>"
-			+ target_post.substr(target_post_null_width);
-	}
+  }
+
+  if (target_post.size() < target_post_width) {
+    target_post_width = target_post.size();
+  } else {
+    while(target_post_width>0 &&
+          target_post.substr(target_post_width-1,1) != " ") {
+      target_post_width--;
+    }
+    target_post = target_post.substr( 0, target_post_width ) + "...";
+  }
+
+  if (m_pre_null) {
+    //cerr << endl << "target_pre_width=" << target_pre_width << ", target_pre_null_width=" << target_pre_null_width << ", target_pre.size()=" << target_pre.size() << endl;
+    if (target_pre_width < target_pre.size())
+      target_pre_null_width -= target_pre.size()-target_pre_width;
+    target_pre = target_pre.substr(0,target_pre_width-target_pre_null_width)
+                 + "<span class=\"null_aligned\">"
+                 + target_pre.substr(target_pre_width-target_pre_null_width)
+                 + "</span>";
+  }
+  if (m_post_null) {
+    //cerr << endl << "target_post_width=" << target_post_width << ", target_post_null_width=" << target_post_null_width << ", target_post.size()=" << target_post.size() << endl;
+    if (target_post_null_width > target_post.size()) {
+      target_post_null_width = target_post.size();
+    }
+    target_post = "<span class=\"null_aligned\">"
+                  + target_post.substr(0,target_post_null_width)
+                  + "</span>"
+                  + target_post.substr(target_post_null_width);
+  }
 
   *out << "<td class=\"pp_target_left\">"
        << target_pre
diff --git a/biconcor/PhrasePairCollection.cpp b/biconcor/PhrasePairCollection.cpp
index 7497b2af8..dd21faad3 100644
--- a/biconcor/PhrasePairCollection.cpp
+++ b/biconcor/PhrasePairCollection.cpp
@@ -47,15 +47,15 @@ int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
     int sentence_length = m_suffixArray->GetSentenceLength( sentence_id );
     int target_length = m_targetCorpus->GetSentenceLength( sentence_id );
     //cerr << "match " << (i-first_match)
-         //<< " in sentence " << sentence_id
-         //<< ", starting at word " << source_start
-         //<< " of " << sentence_length
-         //<< ". target sentence has " << target_length << " words.";
+    //<< " in sentence " << sentence_id
+    //<< ", starting at word " << source_start
+    //<< " of " << sentence_length
+    //<< ". target sentence has " << target_length << " words.";
     int target_start, target_end, pre_null, post_null;
     if (m_alignment->PhraseAlignment( sentence_id, target_length, source_start, source_end, target_start, target_end, pre_null, post_null)) {
       //cerr << " aligned to [" << (int)target_start << "," << (int)target_end << "]";
       //cerr << " +(" << (int)pre_null << "," << (int)post_null << ")";
-			bool null_boundary_words = false;
+      bool null_boundary_words = false;
       for (int pre = 0; pre <= pre_null && (pre == 0 || null_boundary_words); pre++ ) {
         for (int post = 0; post <= post_null && (post == 0 || null_boundary_words); post++ ) {
           vector< WORD_ID > targetString;
@@ -75,19 +75,18 @@ int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
           m_size++;
         }
       }
+    } else {
+      //cerr << "mismatch " << (i-first_match)
+      //		 << " in sentence " << sentence_id
+      //		 << ", starting at word " << source_start
+      //		 << " of " << sentence_length
+      //		 << ". target sentence has " << target_length << " words.";
+      Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
+      if (mismatch->Unaligned())
+        m_unaligned.push_back( mismatch );
+      else
+        m_mismatch.push_back( mismatch );
     }
-		else {
-			//cerr << "mismatch " << (i-first_match)
-			//		 << " in sentence " << sentence_id
-			//		 << ", starting at word " << source_start
-			//		 << " of " << sentence_length
-			//		 << ". target sentence has " << target_length << " words.";
-			Mismatch *mismatch = new Mismatch( m_suffixArray, m_targetCorpus, m_alignment, sentence_id, position, sentence_length, target_length, source_start, source_end );
-			if (mismatch->Unaligned())
-				m_unaligned.push_back( mismatch );
-			else
-				m_mismatch.push_back( mismatch );
-		}
     //cerr << endl;
 
     if (found > (INDEX)m_max_lookup) {
@@ -111,8 +110,7 @@ void PhrasePairCollection::Print(bool pretty) const
     for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
       if (pretty) {
         (*p)->PrintPretty( &cout, 100 );
-      }
-      else {
+      } else {
         (*p)->Print( &cout );
       }
       if (ppWithSameTarget->size() > m_max_example) {
@@ -125,33 +123,32 @@ void PhrasePairCollection::Print(bool pretty) const
 void PhrasePairCollection::PrintHTML() const
 {
   int pp_target = 0;
-	bool singleton = false;
-	// loop over all translations
+  bool singleton = false;
+  // loop over all translations
   vector< vector<PhrasePair*> >::const_iterator ppWithSameTarget;
   for( ppWithSameTarget = m_collection.begin(); ppWithSameTarget != m_collection.end() && pp_target<m_max_translation; ppWithSameTarget++, pp_target++ ) {
 
-		int count = ppWithSameTarget->size();
-		if (!singleton) {
-			if (count == 1) {
-				singleton = true;
-				cout << "<p class=\"pp_singleton_header\">singleton"
-						 << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
-						 << (m_collection.end() - ppWithSameTarget)
-						 << "/" << m_size << ")</p>";
-			}
-			else {
-				cout << "<p class=\"pp_target_header\">";
-				(*(ppWithSameTarget->begin()))->PrintTarget( &cout );
-				cout << " (" << count << "/" << m_size << ")" << endl;
-				cout << "<p><div id=\"pp_" << pp_target << "\">";
-			}
-			cout << "<table align=\"center\">";
-		}
+    int count = ppWithSameTarget->size();
+    if (!singleton) {
+      if (count == 1) {
+        singleton = true;
+        cout << "<p class=\"pp_singleton_header\">singleton"
+             << (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
+             << (m_collection.end() - ppWithSameTarget)
+             << "/" << m_size << ")</p>";
+      } else {
+        cout << "<p class=\"pp_target_header\">";
+        (*(ppWithSameTarget->begin()))->PrintTarget( &cout );
+        cout << " (" << count << "/" << m_size << ")" << endl;
+        cout << "<p><div id=\"pp_" << pp_target << "\">";
+      }
+      cout << "<table align=\"center\">";
+    }
 
     vector< PhrasePair* >::const_iterator p;
-		// loop over all sentences where translation occurs
+    // loop over all sentences where translation occurs
     int pp=0;
-		int i=0;
+    int i=0;
     for(p = ppWithSameTarget->begin(); i<10 && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
       (*p)->PrintClippedHTML( &cout, 160 );
       if (count > m_max_example) {
@@ -159,54 +156,54 @@ void PhrasePairCollection::PrintHTML() const
         pp += count/m_max_example-1;
       }
     }
-		if (i == 10 && pp < count) {			
-			// extended table
-			cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
-			cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
-			cout << "<table align=\"center\">";
-			for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
-				(*p)->PrintClippedHTML( &cout, 160 );
-				if (count > m_max_example) {
-					p += count/m_max_example-1;
-					pp += count/m_max_example-1;
-				}
-			}
-		}
-		if (!singleton) cout << "</table></div>\n";
-		
-		if (!singleton && pp_target == 9) {
-			cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
-			cout << "<p class=\"pp_target_header\">(more)</p></div>";
-			cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
-		}
+    if (i == 10 && pp < count) {
+      // extended table
+      cout << "<tr><td colspan=7 align=center class=\"pp_more\" onclick=\"javascript:document.getElementById('pp_" << pp_target << "').style.display = 'none'; document.getElementById('pp_ext_" << pp_target << "').style.display = 'block';\">(more)</td></tr></table></div>";
+      cout << "<div id=\"pp_ext_" << pp_target << "\" style=\"display:none;\";\">";
+      cout << "<table align=\"center\">";
+      for(i=0, pp=0, p = ppWithSameTarget->begin(); i<m_max_example && pp<count && p != ppWithSameTarget->end(); p++, pp++, i++ ) {
+        (*p)->PrintClippedHTML( &cout, 160 );
+        if (count > m_max_example) {
+          p += count/m_max_example-1;
+          pp += count/m_max_example-1;
+        }
+      }
+    }
+    if (!singleton) cout << "</table></div>\n";
+
+    if (!singleton && pp_target == 9) {
+      cout << "<div id=\"pp_toggle\" onclick=\"javascript:document.getElementById('pp_toggle').style.display = 'none'; document.getElementById('pp_additional').style.display = 'block';\">";
+      cout << "<p class=\"pp_target_header\">(more)</p></div>";
+      cout << "<div id=\"pp_additional\" style=\"display:none;\";\">";
+    }
+  }
+  if (singleton) cout << "</table></div>\n";
+  else if (pp_target > 9)	cout << "</div>";
+
+  size_t max_mismatch = m_max_example/3;
+  // unaligned phrases
+  if (m_unaligned.size() > 0) {
+    cout << "<p class=\"pp_singleton_header\">unaligned"
+         << " (" << (m_unaligned.size()) << ")</p>";
+    cout << "<table align=\"center\">";
+    int step_size = 1;
+    if (m_unaligned.size() > max_mismatch)
+      step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
+    for(size_t i=0; i<m_unaligned.size(); i+=step_size)
+      m_unaligned[i]->PrintClippedHTML( &cout, 160 );
+    cout << "</table>";
+  }
+
+  // mismatched phrases
+  if (m_mismatch.size() > 0) {
+    cout << "<p class=\"pp_singleton_header\">mismatched"
+         << " (" << (m_mismatch.size()) << ")</p>";
+    cout << "<table align=\"center\">";
+    int step_size = 1;
+    if (m_mismatch.size() > max_mismatch)
+      step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
+    for(size_t i=0; i<m_mismatch.size(); i+=step_size)
+      m_mismatch[i]->PrintClippedHTML( &cout, 160 );
+    cout << "</table>";
   }
-	if (singleton) cout << "</table></div>\n";
-	else if (pp_target > 9)	cout << "</div>";
-
-	size_t max_mismatch = m_max_example/3;
-	// unaligned phrases
-	if (m_unaligned.size() > 0) {
-		cout << "<p class=\"pp_singleton_header\">unaligned" 
-				 << " (" << (m_unaligned.size()) << ")</p>";
-		cout << "<table align=\"center\">";
-		int step_size = 1;
-		if (m_unaligned.size() > max_mismatch)
-			step_size = (m_unaligned.size()+max_mismatch-1) / max_mismatch;
-		for(size_t i=0;i<m_unaligned.size();i+=step_size)
-			m_unaligned[i]->PrintClippedHTML( &cout, 160 );
-		cout << "</table>";
-	}
-
-	// mismatched phrases
-	if (m_mismatch.size() > 0) {
-		cout << "<p class=\"pp_singleton_header\">mismatched" 
-				 << " (" << (m_mismatch.size()) << ")</p>";
-		cout << "<table align=\"center\">";
-		int step_size = 1;
-		if (m_mismatch.size() > max_mismatch)
-			step_size = (m_mismatch.size()+max_mismatch-1) / max_mismatch;
-		for(size_t i=0;i<m_mismatch.size();i+=step_size)
-			m_mismatch[i]->PrintClippedHTML( &cout, 160 );
-		cout << "</table>";
-	}	
 }
diff --git a/biconcor/SuffixArray.cpp b/biconcor/SuffixArray.cpp
index 15e6b47b0..f4122a2d8 100644
--- a/biconcor/SuffixArray.cpp
+++ b/biconcor/SuffixArray.cpp
@@ -5,7 +5,8 @@
 #include <stdlib.h>
 #include <cstring>
 
-namespace {
+namespace
+{
 
 const int LINE_MAX_LENGTH = 10000;
 
@@ -14,15 +15,15 @@ const int LINE_MAX_LENGTH = 10000;
 using namespace std;
 
 SuffixArray::SuffixArray()
-    : m_array(NULL),
-      m_index(NULL),
-      m_buffer(NULL),
-      m_wordInSentence(NULL),
-      m_sentence(NULL),
-      m_sentenceLength(NULL),
-      m_vcb(),
-      m_size(0),
-      m_sentenceCount(0) { }
+  : m_array(NULL),
+    m_index(NULL),
+    m_buffer(NULL),
+    m_wordInSentence(NULL),
+    m_sentence(NULL),
+    m_sentenceLength(NULL),
+    m_vcb(),
+    m_size(0),
+    m_sentenceCount(0) { }
 
 SuffixArray::~SuffixArray()
 {
diff --git a/biconcor/TargetCorpus.cpp b/biconcor/TargetCorpus.cpp
index d331a548a..06468007f 100644
--- a/biconcor/TargetCorpus.cpp
+++ b/biconcor/TargetCorpus.cpp
@@ -5,7 +5,8 @@
 #include <stdlib.h>
 #include <cstring>
 
-namespace {
+namespace
+{
 
 const int LINE_MAX_LENGTH = 10000;
 
@@ -14,11 +15,11 @@ const int LINE_MAX_LENGTH = 10000;
 using namespace std;
 
 TargetCorpus::TargetCorpus()
-    : m_array(NULL),
-      m_sentenceEnd(NULL),
-      m_vcb(),
-      m_size(0),
-      m_sentenceCount(0) {}
+  : m_array(NULL),
+    m_sentenceEnd(NULL),
+    m_vcb(),
+    m_size(0),
+    m_sentenceCount(0) {}
 
 TargetCorpus::~TargetCorpus()
 {
diff --git a/biconcor/Vocabulary.cpp b/biconcor/Vocabulary.cpp
index 9c35b3feb..9d52ee44e 100644
--- a/biconcor/Vocabulary.cpp
+++ b/biconcor/Vocabulary.cpp
@@ -2,7 +2,8 @@
 #include "Vocabulary.h"
 #include <fstream>
 
-namespace {
+namespace
+{
 
 const int MAX_LENGTH = 10000;
 
diff --git a/biconcor/base64.cpp b/biconcor/base64.cpp
index 2a863d161..8032399b5 100644
--- a/biconcor/base64.cpp
+++ b/biconcor/base64.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
    base64.cpp and base64.h
 
    Copyright (C) 2004-2008 René Nyffenegger
@@ -28,17 +28,19 @@
 #include "base64.h"
 #include <iostream>
 
-static const std::string base64_chars = 
-             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-             "abcdefghijklmnopqrstuvwxyz"
-             "0123456789+/";
+static const std::string base64_chars =
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+  "abcdefghijklmnopqrstuvwxyz"
+  "0123456789+/";
 
 
-static inline bool is_base64(unsigned char c) {
+static inline bool is_base64(unsigned char c)
+{
   return (isalnum(c) || (c == '+') || (c == '/'));
 }
 
-std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
+std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len)
+{
   std::string ret;
   int i = 0;
   int j = 0;
@@ -59,8 +61,7 @@ std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_
     }
   }
 
-  if (i)
-  {
+  if (i) {
     for(j = i; j < 3; j++)
       char_array_3[j] = '\0';
 
@@ -81,7 +82,8 @@ std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_
 
 }
 
-std::string base64_decode(std::string const& encoded_string) {
+std::string base64_decode(std::string const& encoded_string)
+{
   int in_len = encoded_string.size();
   int i = 0;
   int j = 0;
@@ -90,7 +92,8 @@ std::string base64_decode(std::string const& encoded_string) {
   std::string ret;
 
   while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
-    char_array_4[i++] = encoded_string[in_]; in_++;
+    char_array_4[i++] = encoded_string[in_];
+    in_++;
     if (i ==4) {
       for (i = 0; i <4; i++)
         char_array_4[i] = base64_chars.find(char_array_4[i]);
diff --git a/biconcor/biconcor.cpp b/biconcor/biconcor.cpp
index f4e7c03fb..cb63e855d 100644
--- a/biconcor/biconcor.cpp
+++ b/biconcor/biconcor.cpp
@@ -150,22 +150,19 @@ int main(int argc, char* argv[])
       cout << "TOTAL: " << total << endl;
       if (htmlFlag) {
         ppCollection.PrintHTML();
-      }
-      else {
-	ppCollection.Print(prettyFlag);
+      } else {
+        ppCollection.Print(prettyFlag);
       }
       cout << "-|||- BICONCOR END -|||-" << endl << flush;
     }
-  }
-  else if (queryFlag) {
+  } else if (queryFlag) {
     cerr << "query is " << query << endl;
     vector< string > queryString = alignment.Tokenize( query.c_str() );
     PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
     ppCollection.GetCollection( queryString );
     if (htmlFlag) {
       ppCollection.PrintHTML();
-    }
-    else {
+    } else {
       ppCollection.Print(prettyFlag);
     }
   }
author	Hieu Hoang <hieu@hoang.co.uk>	2013-05-29 21:16:15 +0400
committer	Hieu Hoang <hieu@hoang.co.uk>	2013-05-29 21:16:15 +0400
commit	6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree	3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /biconcor
parent	59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)