Welcome to mirror list, hosted at ThFree Co, Russian Federation.

Mismatch.h « biconcor « ems « scripts - github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: bfcbf4fd8741def6eb8b35421423a9ba85bbaf1c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <string>
#include <stdlib.h>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include "SuffixArray.h"
#include "TargetCorpus.h"
#include "Alignment.h"
#pragma once

using namespace std;

class Mismatch
{
public:
  typedef unsigned int INDEX;

private:
  SuffixArray *m_suffixArray;
  TargetCorpus *m_targetCorpus;
  Alignment *m_alignment;
  INDEX m_sentence_id;
	INDEX m_num_alignment_points;
	char m_source_length;
  char m_target_length;
  SuffixArray::INDEX m_source_position;
  char m_source_start, m_source_end;
	char m_source_unaligned[ 256 ];
	char m_target_unaligned[ 256 ];
	char m_unaligned;

public:
  Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, char source_length, char target_length, char source_start, char source_end )
    :m_suffixArray(sa)
    ,m_targetCorpus(tc)
    ,m_alignment(a)
    ,m_sentence_id(sentence_id)
    ,m_source_position(position)
		,m_source_length(source_length)
    ,m_target_length(target_length)
    ,m_source_start(source_start)
    ,m_source_end(source_end)
  {
		// initialize unaligned indexes
		for(char i=0; i<m_source_length; i++) {
			m_source_unaligned[i] = true;
		}
		for(char i=0; i<m_target_length; i++) {
			m_target_unaligned[i] = true;
		}
		m_num_alignment_points = 
			m_alignment->GetNumberOfAlignmentPoints( sentence_id );
		for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
			m_source_unaligned[ m_alignment->GetSourceWord( sentence_id, ap ) ] = false;
			m_target_unaligned[ m_alignment->GetTargetWord( sentence_id, ap ) ] = false;
		}
		m_unaligned = true;
		for(char i=source_start; i<=source_end; i++) {
			if (!m_source_unaligned[ i ]) {
				m_unaligned = false;
			}
		}
	}
  ~Mismatch () {}

	bool Unaligned() { return m_unaligned; }
  void PrintClippedHTML( ostream* out, int width );
	void LabelSourceMatches( char *source_annotation, char *target_annotation, char source_id, char label );
};