Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2013-05-29 21:16:15 +0400
committerHieu Hoang <hieu@hoang.co.uk>2013-05-29 21:16:15 +0400
commit6249432407af8730c10bccc7894c0725fcaf5e47 (patch)
tree3ac1f094b9fdc199b04bc5ef209ce00e3596e37d /biconcor/Mismatch.cpp
parent59bd7deb4b6b9c4f7b3b7dbb055783528fbc31ca (diff)
beautify
Diffstat (limited to 'biconcor/Mismatch.cpp')
-rw-r--r--biconcor/Mismatch.cpp443
1 files changed, 222 insertions, 221 deletions
diff --git a/biconcor/Mismatch.cpp b/biconcor/Mismatch.cpp
index 31140b200..c3afec781 100644
--- a/biconcor/Mismatch.cpp
+++ b/biconcor/Mismatch.cpp
@@ -23,16 +23,16 @@ enum {
};
Mismatch::Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, int source_length, int target_length, int source_start, int source_end )
- :m_suffixArray(sa)
- ,m_targetCorpus(tc)
- ,m_alignment(a)
- ,m_sentence_id(sentence_id)
- ,m_source_length(source_length)
- ,m_target_length(target_length)
- ,m_source_position(position)
- ,m_source_start(source_start)
- ,m_source_end(source_end)
- ,m_unaligned(true)
+ :m_suffixArray(sa)
+ ,m_targetCorpus(tc)
+ ,m_alignment(a)
+ ,m_sentence_id(sentence_id)
+ ,m_source_length(source_length)
+ ,m_target_length(target_length)
+ ,m_source_position(position)
+ ,m_source_start(source_start)
+ ,m_source_end(source_end)
+ ,m_unaligned(true)
{
// initialize unaligned indexes
for (int i = 0; i < m_source_length; i++) {
@@ -42,7 +42,7 @@ Mismatch::Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sente
m_target_unaligned[i] = true;
}
m_num_alignment_points =
- m_alignment->GetNumberOfAlignmentPoints( sentence_id );
+ m_alignment->GetNumberOfAlignmentPoints( sentence_id );
for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
m_source_unaligned[ (int)m_alignment->GetSourceWord( sentence_id, ap ) ] = false;
m_target_unaligned[ (int)m_alignment->GetTargetWord( sentence_id, ap ) ] = false;
@@ -58,234 +58,235 @@ Mismatch::~Mismatch () {}
void Mismatch::PrintClippedHTML( ostream* out, int width )
{
- int source_annotation[256], target_annotation[256];
- vector< string > label_class;
- label_class.push_back( "" );
- label_class.push_back( "mismatch_pre_aligned" );
- label_class.push_back( "mismatch_post_aligned" );
- label_class.push_back( "null_aligned" );
- label_class.push_back( "mismatch_misaligned" );
- label_class.push_back( "mismatch_aligned" );
+ int source_annotation[256], target_annotation[256];
+ vector< string > label_class;
+ label_class.push_back( "" );
+ label_class.push_back( "mismatch_pre_aligned" );
+ label_class.push_back( "mismatch_post_aligned" );
+ label_class.push_back( "null_aligned" );
+ label_class.push_back( "mismatch_misaligned" );
+ label_class.push_back( "mismatch_aligned" );
- for(int i=0; i<m_source_length;i++) source_annotation[i] = UNANNOTATED;
- for(int i=0; i<m_target_length;i++) target_annotation[i] = UNANNOTATED;
-
- if (m_unaligned) {
- // find alignment points for prior and next word(s) and
- // center target phrase around those.
- bool found_aligned = false;
- for(int i=1; i<m_source_length && !found_aligned; i++) {
- if (m_source_start-i >= 0) {
- int word_id = m_source_start-i;
- source_annotation[ word_id ] = UNALIGNED;
- if (!m_source_unaligned[ word_id ]) {
- found_aligned = true;
- LabelSourceMatches( source_annotation, target_annotation, word_id, PRE_ALIGNED );
- }
- }
+ for(int i=0; i<m_source_length; i++) source_annotation[i] = UNANNOTATED;
+ for(int i=0; i<m_target_length; i++) target_annotation[i] = UNANNOTATED;
- if (m_source_end+i < m_source_length) {
- int word_id = m_source_end+i;
- source_annotation[ word_id ] = UNALIGNED;
- if (!m_source_unaligned[ word_id ]) {
- found_aligned = true;
- LabelSourceMatches( source_annotation, target_annotation, word_id, POST_ALIGNED );
- }
- }
- }
-
- }
- // misalignment
- else {
- // label aligned output words
- for(int i=m_source_start; i<=m_source_end; i++)
- LabelSourceMatches( source_annotation, target_annotation, i, ALIGNED );
+ if (m_unaligned) {
+ // find alignment points for prior and next word(s) and
+ // center target phrase around those.
+ bool found_aligned = false;
+ for(int i=1; i<m_source_length && !found_aligned; i++) {
+ if (m_source_start-i >= 0) {
+ int word_id = m_source_start-i;
+ source_annotation[ word_id ] = UNALIGNED;
+ if (!m_source_unaligned[ word_id ]) {
+ found_aligned = true;
+ LabelSourceMatches( source_annotation, target_annotation, word_id, PRE_ALIGNED );
+ }
+ }
- // find first and last
- int target_start = -1;
- int target_end;
- for(int i=0; i<m_target_length; i++)
- if (target_annotation[i] == ALIGNED) {
- if (target_start == -1)
- target_start = i;
- target_end = i;
- }
- // go over all enclosed target words
- for(int i=target_start; i<=target_end; i++) {
- // label other target words as unaligned or misaligned
- if (m_target_unaligned[ i ])
- target_annotation[ i ] = UNALIGNED;
- else {
- if (target_annotation[ i ] != ALIGNED)
- target_annotation[ i ] = MISALIGNED;
- // loop over aligned source words
- for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
- if (m_alignment->GetTargetWord( m_sentence_id, ap ) == i) {
- int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
- // if not part of the source phrase -> also misaligned
- if (source_word < m_source_start || source_word > m_source_end)
- source_annotation[ source_word ] = MISALIGNED;
- }
- }
- }
- }
- // closure
- bool change = true;
- while(change) {
- change = false;
- for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
- int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
- int target_word = m_alignment->GetTargetWord( m_sentence_id, ap );
- if (source_annotation[source_word] != UNANNOTATED &&
- target_annotation[target_word] == UNANNOTATED) {
- target_annotation[target_word] = MISALIGNED;
- change = true;
- }
- if (source_annotation[source_word] == UNANNOTATED &&
- target_annotation[target_word] != UNANNOTATED) {
- source_annotation[source_word] = MISALIGNED;
- change = true;
- }
- }
- }
- }
-
- // print source
- // shorten source context if too long
+ if (m_source_end+i < m_source_length) {
+ int word_id = m_source_end+i;
+ source_annotation[ word_id ] = UNALIGNED;
+ if (!m_source_unaligned[ word_id ]) {
+ found_aligned = true;
+ LabelSourceMatches( source_annotation, target_annotation, word_id, POST_ALIGNED );
+ }
+ }
+ }
+
+ }
+ // misalignment
+ else {
+ // label aligned output words
+ for(int i=m_source_start; i<=m_source_end; i++)
+ LabelSourceMatches( source_annotation, target_annotation, i, ALIGNED );
+
+ // find first and last
+ int target_start = -1;
+ int target_end;
+ for(int i=0; i<m_target_length; i++)
+ if (target_annotation[i] == ALIGNED) {
+ if (target_start == -1)
+ target_start = i;
+ target_end = i;
+ }
+ // go over all enclosed target words
+ for(int i=target_start; i<=target_end; i++) {
+ // label other target words as unaligned or misaligned
+ if (m_target_unaligned[ i ])
+ target_annotation[ i ] = UNALIGNED;
+ else {
+ if (target_annotation[ i ] != ALIGNED)
+ target_annotation[ i ] = MISALIGNED;
+ // loop over aligned source words
+ for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+ if (m_alignment->GetTargetWord( m_sentence_id, ap ) == i) {
+ int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
+ // if not part of the source phrase -> also misaligned
+ if (source_word < m_source_start || source_word > m_source_end)
+ source_annotation[ source_word ] = MISALIGNED;
+ }
+ }
+ }
+ }
+ // closure
+ bool change = true;
+ while(change) {
+ change = false;
+ for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+ int source_word = m_alignment->GetSourceWord( m_sentence_id, ap );
+ int target_word = m_alignment->GetTargetWord( m_sentence_id, ap );
+ if (source_annotation[source_word] != UNANNOTATED &&
+ target_annotation[target_word] == UNANNOTATED) {
+ target_annotation[target_word] = MISALIGNED;
+ change = true;
+ }
+ if (source_annotation[source_word] == UNANNOTATED &&
+ target_annotation[target_word] != UNANNOTATED) {
+ source_annotation[source_word] = MISALIGNED;
+ change = true;
+ }
+ }
+ }
+ }
+
+ // print source
+ // shorten source context if too long
int sentence_start = m_source_position - m_source_start;
- int context_space = width/2;
- for(int i=m_source_start;i<=m_source_end;i++)
- context_space -= m_suffixArray->GetWord( sentence_start + i ).size() + 1;
- context_space /= 2;
+ int context_space = width/2;
+ for(int i=m_source_start; i<=m_source_end; i++)
+ context_space -= m_suffixArray->GetWord( sentence_start + i ).size() + 1;
+ context_space /= 2;
- int remaining = context_space;
- int start_word = m_source_start;
- for(;start_word>0 && remaining>0; start_word--)
- remaining -= m_suffixArray->GetWord( sentence_start + start_word-1 ).size() + 1;
- if (remaining<0 || start_word == -1) start_word++;
+ int remaining = context_space;
+ int start_word = m_source_start;
+ for(; start_word>0 && remaining>0; start_word--)
+ remaining -= m_suffixArray->GetWord( sentence_start + start_word-1 ).size() + 1;
+ if (remaining<0 || start_word == -1) start_word++;
- remaining = context_space;
- int end_word = m_source_end;
- for(;end_word<m_source_length && remaining>0; end_word++)
- remaining -= m_suffixArray->GetWord( sentence_start + end_word ).size() + 1;
- end_word--;
+ remaining = context_space;
+ int end_word = m_source_end;
+ for(; end_word<m_source_length && remaining>0; end_word++)
+ remaining -= m_suffixArray->GetWord( sentence_start + end_word ).size() + 1;
+ end_word--;
- // output with markup
- *out << "<tr><td class=\"pp_source_left\">";
- char current_label = UNANNOTATED;
- if (start_word>0) {
- current_label = source_annotation[start_word-1];
- *out << "... ";
- }
- for(int i=start_word; i<=end_word; i++) {
- // change to phrase block
- if (i == m_source_start) {
- if (current_label != UNANNOTATED && i!=start_word)
- *out << "</span>";
- *out << "</td><td class=\"pp_source\">";
- current_label = UNANNOTATED;
- }
+ // output with markup
+ *out << "<tr><td class=\"pp_source_left\">";
+ char current_label = UNANNOTATED;
+ if (start_word>0) {
+ current_label = source_annotation[start_word-1];
+ *out << "... ";
+ }
+ for(int i=start_word; i<=end_word; i++) {
+ // change to phrase block
+ if (i == m_source_start) {
+ if (current_label != UNANNOTATED && i!=start_word)
+ *out << "</span>";
+ *out << "</td><td class=\"pp_source\">";
+ current_label = UNANNOTATED;
+ }
- // change to labeled word
- else if (source_annotation[i] != current_label &&
- source_annotation[i] != ALIGNED) {
- if (current_label != UNANNOTATED && i!=start_word)
- *out << "</span>";
- if (source_annotation[i] != UNANNOTATED)
- *out << "<span class=\""
- << label_class[ source_annotation[i] ]
- << "\">";
- current_label = source_annotation[i];
- }
+ // change to labeled word
+ else if (source_annotation[i] != current_label &&
+ source_annotation[i] != ALIGNED) {
+ if (current_label != UNANNOTATED && i!=start_word)
+ *out << "</span>";
+ if (source_annotation[i] != UNANNOTATED)
+ *out << "<span class=\""
+ << label_class[ source_annotation[i] ]
+ << "\">";
+ current_label = source_annotation[i];
+ }
- // output word
- *out << m_suffixArray->GetWord( sentence_start + i ) << " ";
+ // output word
+ *out << m_suffixArray->GetWord( sentence_start + i ) << " ";
- // change to right context block
- if (i == m_source_end) {
- *out << "</td><td class=\"pp_source_right\">";
- current_label = UNANNOTATED;
- }
- }
+ // change to right context block
+ if (i == m_source_end) {
+ *out << "</td><td class=\"pp_source_right\">";
+ current_label = UNANNOTATED;
+ }
+ }
- if (current_label != UNANNOTATED && end_word>m_source_end)
- *out << "</span>";
- if (end_word<m_source_length-1)
- *out << "... ";
+ if (current_label != UNANNOTATED && end_word>m_source_end)
+ *out << "</span>";
+ if (end_word<m_source_length-1)
+ *out << "... ";
- // print target
- // shorten target context if too long
- int target_start = -1;
- int target_end;
- for(int i=0; i<m_target_length; i++)
- if (target_annotation[i] != UNANNOTATED) {
- if (target_start == -1)
- target_start = i;
- target_end = i;
- }
+ // print target
+ // shorten target context if too long
+ int target_start = -1;
+ int target_end;
+ for(int i=0; i<m_target_length; i++)
+ if (target_annotation[i] != UNANNOTATED) {
+ if (target_start == -1)
+ target_start = i;
+ target_end = i;
+ }
- context_space = width/2;
- for(int i=target_start;i<=target_end;i++)
- context_space -= m_targetCorpus->GetWord( m_sentence_id, i ).size() + 1;
- while (context_space < 0) { // shorten matched part, if too long
- context_space +=
- m_targetCorpus->GetWord( m_sentence_id, target_start ).size() +
- m_targetCorpus->GetWord( m_sentence_id, target_end ).size() + 2;
- target_start++;
- target_end--;
- }
- context_space /= 2;
+ context_space = width/2;
+ for(int i=target_start; i<=target_end; i++)
+ context_space -= m_targetCorpus->GetWord( m_sentence_id, i ).size() + 1;
+ while (context_space < 0) { // shorten matched part, if too long
+ context_space +=
+ m_targetCorpus->GetWord( m_sentence_id, target_start ).size() +
+ m_targetCorpus->GetWord( m_sentence_id, target_end ).size() + 2;
+ target_start++;
+ target_end--;
+ }
+ context_space /= 2;
- remaining = context_space;
- start_word = target_start;
- for(;start_word>0 && remaining>0; start_word--) {
- //cerr << "remaining: " << remaining << ", start_word: " << start_word << endl;
- remaining -= m_targetCorpus->GetWord( m_sentence_id, start_word-1 ).size() + 1;
- }
- if (remaining<0 || start_word == -1) start_word++;
+ remaining = context_space;
+ start_word = target_start;
+ for(; start_word>0 && remaining>0; start_word--) {
+ //cerr << "remaining: " << remaining << ", start_word: " << start_word << endl;
+ remaining -= m_targetCorpus->GetWord( m_sentence_id, start_word-1 ).size() + 1;
+ }
+ if (remaining<0 || start_word == -1) start_word++;
- remaining = context_space;
- end_word = target_end;
- for(;end_word<m_target_length && remaining>0; end_word++) {
- //cerr << "remaining: " << remaining << ", end_word: " << end_word << endl;
- remaining -= m_targetCorpus->GetWord( m_sentence_id, end_word ).size() + 1;
- }
- end_word--;
+ remaining = context_space;
+ end_word = target_end;
+ for(; end_word<m_target_length && remaining>0; end_word++) {
+ //cerr << "remaining: " << remaining << ", end_word: " << end_word << endl;
+ remaining -= m_targetCorpus->GetWord( m_sentence_id, end_word ).size() + 1;
+ }
+ end_word--;
- // output with markup
- *out << "</td><td class=\"mismatch_target\">";
- current_label = UNANNOTATED;
- if (start_word>0) {
- current_label = target_annotation[start_word-1];
- *out << "... ";
- }
- for(int i=start_word; i<=end_word; i++) {
- if (target_annotation[i] != current_label) {
- if (current_label != UNANNOTATED && i!=start_word)
- *out << "</span>";
- if (target_annotation[i] != UNANNOTATED)
- *out << "<span class=\""
- << label_class[ target_annotation[i] ]
- << "\">";
- current_label = target_annotation[i];
- }
+ // output with markup
+ *out << "</td><td class=\"mismatch_target\">";
+ current_label = UNANNOTATED;
+ if (start_word>0) {
+ current_label = target_annotation[start_word-1];
+ *out << "... ";
+ }
+ for(int i=start_word; i<=end_word; i++) {
+ if (target_annotation[i] != current_label) {
+ if (current_label != UNANNOTATED && i!=start_word)
+ *out << "</span>";
+ if (target_annotation[i] != UNANNOTATED)
+ *out << "<span class=\""
+ << label_class[ target_annotation[i] ]
+ << "\">";
+ current_label = target_annotation[i];
+ }
- // output word
- *out << m_targetCorpus->GetWord( m_sentence_id, i ) << " ";
- }
+ // output word
+ *out << m_targetCorpus->GetWord( m_sentence_id, i ) << " ";
+ }
- if (current_label != UNANNOTATED && end_word>target_end)
- *out << "</span>";
- if (end_word<m_target_length-1)
- *out << "... ";
- *out << "</td></tr>";
+ if (current_label != UNANNOTATED && end_word>target_end)
+ *out << "</span>";
+ if (end_word<m_target_length-1)
+ *out << "... ";
+ *out << "</td></tr>";
}
-void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label ) {
- for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
- if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
- source_annotation[ source_id ] = label;
- target_annotation[ m_alignment->GetTargetWord( m_sentence_id, ap ) ] = label;
- }
- }
+void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label )
+{
+ for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
+ if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
+ source_annotation[ source_id ] = label;
+ target_annotation[ m_alignment->GetTargetWord( m_sentence_id, ap ) ] = label;
+ }
+ }
}