Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'moses/TranslationModel')
-rw-r--r--moses/TranslationModel/CompactPT/BlockHashIndex.h4
-rw-r--r--moses/TranslationModel/CompactPT/MurmurHash3.cpp850
-rw-r--r--moses/TranslationModel/CompactPT/MurmurHash3.h74
-rw-r--r--moses/TranslationModel/PhraseDictionary.h10
-rw-r--r--moses/TranslationModel/ProbingPT/hash.hh2
-rw-r--r--moses/TranslationModel/ProbingPT/storing.hh2
-rw-r--r--moses/TranslationModel/UG/TargetPhraseCollectionCache.cc52
-rw-r--r--moses/TranslationModel/UG/TargetPhraseCollectionCache.h18
-rw-r--r--moses/TranslationModel/UG/bitext-find.cc36
-rw-r--r--moses/TranslationModel/UG/count-ptable-features.cc4
-rw-r--r--moses/TranslationModel/UG/generic/file_io/ug_stream.cpp8
-rw-r--r--moses/TranslationModel/UG/generic/file_io/ug_stream.h2
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp8
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_get_options.h12
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc12
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h8
-rw-r--r--moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h22
-rw-r--r--moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc78
-rw-r--r--moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h16
-rw-r--r--moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc16
-rw-r--r--moses/TranslationModel/UG/mm/calc-coverage.cc2
-rw-r--r--moses/TranslationModel/UG/mm/custom-pt.cc34
-rw-r--r--moses/TranslationModel/UG/mm/mam2symal.cc14
-rw-r--r--moses/TranslationModel/UG/mm/mam_verify.cc12
-rw-r--r--moses/TranslationModel/UG/mm/mmlex-build.cc50
-rw-r--r--moses/TranslationModel/UG/mm/mmlex-lookup.cc30
-rw-r--r--moses/TranslationModel/UG/mm/mtt-build.cc102
-rw-r--r--moses/TranslationModel/UG/mm/mtt-count-words.cc6
-rw-r--r--moses/TranslationModel/UG/mm/mtt-demo1.cc10
-rw-r--r--moses/TranslationModel/UG/mm/mtt-dump.cc30
-rw-r--r--moses/TranslationModel/UG/mm/mtt.count.cc8
-rw-r--r--moses/TranslationModel/UG/mm/num_read_write.cc30
-rw-r--r--moses/TranslationModel/UG/mm/num_read_write.h8
-rw-r--r--moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h36
-rw-r--r--moses/TranslationModel/UG/mm/symal2mam.cc48
-rw-r--r--moses/TranslationModel/UG/mm/tpt_pickler.cc110
-rw-r--r--moses/TranslationModel/UG/mm/tpt_pickler.h36
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tightindex.cc212
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tightindex.h36
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tokenindex.cc90
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tokenindex.h30
-rw-r--r--moses/TranslationModel/UG/mm/tpt_typedefs.h2
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.cc46
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.h222
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_agenda.h46
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h98
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h16
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_jstats.cc20
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_jstats.h14
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_pstats.cc22
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_pstats.h16
-rw-r--r--moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h10
-rw-r--r--moses/TranslationModel/UG/mm/ug_conll_record.h16
-rw-r--r--moses/TranslationModel/UG/mm/ug_corpus_token.cc6
-rw-r--r--moses/TranslationModel/UG/mm/ug_corpus_token.h28
-rw-r--r--moses/TranslationModel/UG/mm/ug_deptree.cc68
-rw-r--r--moses/TranslationModel/UG/mm/ug_deptree.h26
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_bitext.cc16
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_bitext.h38
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_tsa.h116
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_ttrack.h56
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h30
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h42
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.cc26
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.h6
-rw-r--r--moses/TranslationModel/UG/mm/ug_load_primer.h4
-rw-r--r--moses/TranslationModel/UG/mm/ug_lru_cache.h22
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_2d_table.h26
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_bitext.h20
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_tsa.h34
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_ttrack.h44
-rw-r--r--moses/TranslationModel/UG/mm/ug_mmbitext.cc66
-rw-r--r--moses/TranslationModel/UG/mm/ug_mmbitext.h46
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.cc16
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.h122
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.cc82
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.h40
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_array_entry.h12
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_base.h220
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h12
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h192
-rw-r--r--moses/TranslationModel/UG/mm/ug_ttrack_base.cc4
-rw-r--r--moses/TranslationModel/UG/mm/ug_ttrack_base.h116
-rw-r--r--moses/TranslationModel/UG/mm/ug_ttrack_position.h32
-rw-r--r--moses/TranslationModel/UG/mm/ug_typedefs.h2
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp186
-rw-r--r--moses/TranslationModel/UG/mmsapt.h84
-rw-r--r--moses/TranslationModel/UG/mmsapt_align.cc44
-rw-r--r--moses/TranslationModel/UG/ptable-describe-features.cc6
-rw-r--r--moses/TranslationModel/UG/ptable-lookup.cc24
-rw-r--r--moses/TranslationModel/UG/sapt_phrase_key.h2
-rw-r--r--moses/TranslationModel/UG/sapt_phrase_scorers.h2
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_base.h52
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_coherence.h14
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_lex1.h38
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_logcnt.h26
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_pbwd.h18
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_pfwd.h32
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_phrasecount.h10
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_provenance.h18
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_rareness.h14
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_unaligned.h24
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_wordcount.h10
-rw-r--r--moses/TranslationModel/UG/sim-pe.cc18
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage.cc42
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage2.cc14
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage3.cc46
-rw-r--r--moses/TranslationModel/UG/try-align.cc134
-rw-r--r--moses/TranslationModel/UG/try-align2.cc174
-rw-r--r--moses/TranslationModel/UG/util/ibm1-align.cc32
-rw-r--r--moses/TranslationModel/UG/util/tokenindex.dump.cc2
-rw-r--r--moses/TranslationModel/fuzzy-match/Vocabulary.cpp142
112 files changed, 2737 insertions, 2737 deletions
diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.h b/moses/TranslationModel/CompactPT/BlockHashIndex.h
index b3f5e6f4b..130dd89fc 100644
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.h
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h
@@ -161,8 +161,8 @@ public:
}
#ifdef WITH_THREADS
-
- boost::shared_ptr<HashTask<Keys> >
+
+ boost::shared_ptr<HashTask<Keys> >
ht(new HashTask<Keys>(current, *this, keys));
m_threadPool.Submit(ht);
#else
diff --git a/moses/TranslationModel/CompactPT/MurmurHash3.cpp b/moses/TranslationModel/CompactPT/MurmurHash3.cpp
index fb6946bbc..dfde88708 100644
--- a/moses/TranslationModel/CompactPT/MurmurHash3.cpp
+++ b/moses/TranslationModel/CompactPT/MurmurHash3.cpp
@@ -1,425 +1,425 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
-#include "MurmurHash3.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE __forceinline
-
-#include <cstdlib>
-
-#define ROTL32(x,y) _rotl(x,y)
-#define ROTL64(x,y) _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#define FORCE_INLINE inline __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
- return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
- return (x << r) | (x >> (64 - r));
-}
-
-#define ROTL32(x,y) rotl32(x,y)
-#define ROTL64(x,y) rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
-{
- return p[i];
-}
-
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
- return p[i];
-}
-
-//-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
-
-FORCE_INLINE uint32_t fmix ( uint32_t h )
-{
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
-
- return h;
-}
-
-//----------
-
-FORCE_INLINE uint64_t fmix ( uint64_t k )
-{
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xff51afd7ed558ccd);
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
- k ^= k >> 33;
-
- return k;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len,
- uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 4;
-
- uint32_t h1 = seed;
-
- uint32_t c1 = 0xcc9e2d51;
- uint32_t c2 = 0x1b873593;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
-
- for(int i = -nblocks; i; i++) {
- uint32_t k1 = getblock(blocks,i);
-
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
-
- h1 ^= k1;
- h1 = ROTL32(h1,13);
- h1 = h1*5+0xe6546b64;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
-
- uint32_t k1 = 0;
-
- switch(len & 3) {
- case 3:
- k1 ^= tail[2] << 16;
- case 2:
- k1 ^= tail[1] << 8;
- case 1:
- k1 ^= tail[0];
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
-
- h1 = fmix(h1);
-
- *(uint32_t*)out = h1;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_128 ( const void * key, const int len,
- uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 16;
-
- uint32_t h1 = seed;
- uint32_t h2 = seed;
- uint32_t h3 = seed;
- uint32_t h4 = seed;
-
- uint32_t c1 = 0x239b961b;
- uint32_t c2 = 0xab0e9789;
- uint32_t c3 = 0x38b34ae5;
- uint32_t c4 = 0xa1e38b93;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
-
- for(int i = -nblocks; i; i++) {
- uint32_t k1 = getblock(blocks,i*4+0);
- uint32_t k2 = getblock(blocks,i*4+1);
- uint32_t k3 = getblock(blocks,i*4+2);
- uint32_t k4 = getblock(blocks,i*4+3);
-
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
- h1 ^= k1;
-
- h1 = ROTL32(h1,19);
- h1 += h2;
- h1 = h1*5+0x561ccd1b;
-
- k2 *= c2;
- k2 = ROTL32(k2,16);
- k2 *= c3;
- h2 ^= k2;
-
- h2 = ROTL32(h2,17);
- h2 += h3;
- h2 = h2*5+0x0bcaa747;
-
- k3 *= c3;
- k3 = ROTL32(k3,17);
- k3 *= c4;
- h3 ^= k3;
-
- h3 = ROTL32(h3,15);
- h3 += h4;
- h3 = h3*5+0x96cd1c35;
-
- k4 *= c4;
- k4 = ROTL32(k4,18);
- k4 *= c1;
- h4 ^= k4;
-
- h4 = ROTL32(h4,13);
- h4 += h1;
- h4 = h4*5+0x32ac3b17;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
- uint32_t k1 = 0;
- uint32_t k2 = 0;
- uint32_t k3 = 0;
- uint32_t k4 = 0;
-
- switch(len & 15) {
- case 15:
- k4 ^= tail[14] << 16;
- case 14:
- k4 ^= tail[13] << 8;
- case 13:
- k4 ^= tail[12] << 0;
- k4 *= c4;
- k4 = ROTL32(k4,18);
- k4 *= c1;
- h4 ^= k4;
-
- case 12:
- k3 ^= tail[11] << 24;
- case 11:
- k3 ^= tail[10] << 16;
- case 10:
- k3 ^= tail[ 9] << 8;
- case 9:
- k3 ^= tail[ 8] << 0;
- k3 *= c3;
- k3 = ROTL32(k3,17);
- k3 *= c4;
- h3 ^= k3;
-
- case 8:
- k2 ^= tail[ 7] << 24;
- case 7:
- k2 ^= tail[ 6] << 16;
- case 6:
- k2 ^= tail[ 5] << 8;
- case 5:
- k2 ^= tail[ 4] << 0;
- k2 *= c2;
- k2 = ROTL32(k2,16);
- k2 *= c3;
- h2 ^= k2;
-
- case 4:
- k1 ^= tail[ 3] << 24;
- case 3:
- k1 ^= tail[ 2] << 16;
- case 2:
- k1 ^= tail[ 1] << 8;
- case 1:
- k1 ^= tail[ 0] << 0;
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
- h2 ^= len;
- h3 ^= len;
- h4 ^= len;
-
- h1 += h2;
- h1 += h3;
- h1 += h4;
- h2 += h1;
- h3 += h1;
- h4 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
- h3 = fmix(h3);
- h4 = fmix(h4);
-
- h1 += h2;
- h1 += h3;
- h1 += h4;
- h2 += h1;
- h3 += h1;
- h4 += h1;
-
- ((uint32_t*)out)[0] = h1;
- ((uint32_t*)out)[1] = h2;
- ((uint32_t*)out)[2] = h3;
- ((uint32_t*)out)[3] = h4;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x64_128 ( const void * key, const int len,
- const uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 16;
-
- uint64_t h1 = seed;
- uint64_t h2 = seed;
-
- uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
- uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
- //----------
- // body
-
- const uint64_t * blocks = (const uint64_t *)(data);
-
- for(int i = 0; i < nblocks; i++) {
- uint64_t k1 = getblock(blocks,i*2+0);
- uint64_t k2 = getblock(blocks,i*2+1);
-
- k1 *= c1;
- k1 = ROTL64(k1,31);
- k1 *= c2;
- h1 ^= k1;
-
- h1 = ROTL64(h1,27);
- h1 += h2;
- h1 = h1*5+0x52dce729;
-
- k2 *= c2;
- k2 = ROTL64(k2,33);
- k2 *= c1;
- h2 ^= k2;
-
- h2 = ROTL64(h2,31);
- h2 += h1;
- h2 = h2*5+0x38495ab5;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
- uint64_t k1 = 0;
- uint64_t k2 = 0;
-
- switch(len & 15) {
- case 15:
- k2 ^= uint64_t(tail[14]) << 48;
- case 14:
- k2 ^= uint64_t(tail[13]) << 40;
- case 13:
- k2 ^= uint64_t(tail[12]) << 32;
- case 12:
- k2 ^= uint64_t(tail[11]) << 24;
- case 11:
- k2 ^= uint64_t(tail[10]) << 16;
- case 10:
- k2 ^= uint64_t(tail[ 9]) << 8;
- case 9:
- k2 ^= uint64_t(tail[ 8]) << 0;
- k2 *= c2;
- k2 = ROTL64(k2,33);
- k2 *= c1;
- h2 ^= k2;
-
- case 8:
- k1 ^= uint64_t(tail[ 7]) << 56;
- case 7:
- k1 ^= uint64_t(tail[ 6]) << 48;
- case 6:
- k1 ^= uint64_t(tail[ 5]) << 40;
- case 5:
- k1 ^= uint64_t(tail[ 4]) << 32;
- case 4:
- k1 ^= uint64_t(tail[ 3]) << 24;
- case 3:
- k1 ^= uint64_t(tail[ 2]) << 16;
- case 2:
- k1 ^= uint64_t(tail[ 1]) << 8;
- case 1:
- k1 ^= uint64_t(tail[ 0]) << 0;
- k1 *= c1;
- k1 = ROTL64(k1,31);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
- h2 ^= len;
-
- h1 += h2;
- h2 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
-
- h1 += h2;
- h2 += h1;
-
- ((uint64_t*)out)[0] = h1;
- ((uint64_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
-
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE __forceinline
+
+#include <cstdlib>
+
+#define ROTL32(x,y) _rotl(x,y)
+#define ROTL64(x,y) _rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+ return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL32(x,y) rotl32(x,y)
+#define ROTL64(x,y) rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+ return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+ return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+ uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 4;
+
+ uint32_t h1 = seed;
+
+ uint32_t c1 = 0xcc9e2d51;
+ uint32_t c2 = 0x1b873593;
+
+ //----------
+ // body
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+ for(int i = -nblocks; i; i++) {
+ uint32_t k1 = getblock(blocks,i);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1*5+0xe6546b64;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+ uint32_t k1 = 0;
+
+ switch(len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ case 2:
+ k1 ^= tail[1] << 8;
+ case 1:
+ k1 ^= tail[0];
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+
+ h1 = fmix(h1);
+
+ *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+ uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint32_t h1 = seed;
+ uint32_t h2 = seed;
+ uint32_t h3 = seed;
+ uint32_t h4 = seed;
+
+ uint32_t c1 = 0x239b961b;
+ uint32_t c2 = 0xab0e9789;
+ uint32_t c3 = 0x38b34ae5;
+ uint32_t c4 = 0xa1e38b93;
+
+ //----------
+ // body
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+ for(int i = -nblocks; i; i++) {
+ uint32_t k1 = getblock(blocks,i*4+0);
+ uint32_t k2 = getblock(blocks,i*4+1);
+ uint32_t k3 = getblock(blocks,i*4+2);
+ uint32_t k4 = getblock(blocks,i*4+3);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = ROTL32(h1,19);
+ h1 += h2;
+ h1 = h1*5+0x561ccd1b;
+
+ k2 *= c2;
+ k2 = ROTL32(k2,16);
+ k2 *= c3;
+ h2 ^= k2;
+
+ h2 = ROTL32(h2,17);
+ h2 += h3;
+ h2 = h2*5+0x0bcaa747;
+
+ k3 *= c3;
+ k3 = ROTL32(k3,17);
+ k3 *= c4;
+ h3 ^= k3;
+
+ h3 = ROTL32(h3,15);
+ h3 += h4;
+ h3 = h3*5+0x96cd1c35;
+
+ k4 *= c4;
+ k4 = ROTL32(k4,18);
+ k4 *= c1;
+ h4 ^= k4;
+
+ h4 = ROTL32(h4,13);
+ h4 += h1;
+ h4 = h4*5+0x32ac3b17;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint32_t k1 = 0;
+ uint32_t k2 = 0;
+ uint32_t k3 = 0;
+ uint32_t k4 = 0;
+
+ switch(len & 15) {
+ case 15:
+ k4 ^= tail[14] << 16;
+ case 14:
+ k4 ^= tail[13] << 8;
+ case 13:
+ k4 ^= tail[12] << 0;
+ k4 *= c4;
+ k4 = ROTL32(k4,18);
+ k4 *= c1;
+ h4 ^= k4;
+
+ case 12:
+ k3 ^= tail[11] << 24;
+ case 11:
+ k3 ^= tail[10] << 16;
+ case 10:
+ k3 ^= tail[ 9] << 8;
+ case 9:
+ k3 ^= tail[ 8] << 0;
+ k3 *= c3;
+ k3 = ROTL32(k3,17);
+ k3 *= c4;
+ h3 ^= k3;
+
+ case 8:
+ k2 ^= tail[ 7] << 24;
+ case 7:
+ k2 ^= tail[ 6] << 16;
+ case 6:
+ k2 ^= tail[ 5] << 8;
+ case 5:
+ k2 ^= tail[ 4] << 0;
+ k2 *= c2;
+ k2 = ROTL32(k2,16);
+ k2 *= c3;
+ h2 ^= k2;
+
+ case 4:
+ k1 ^= tail[ 3] << 24;
+ case 3:
+ k1 ^= tail[ 2] << 16;
+ case 2:
+ k1 ^= tail[ 1] << 8;
+ case 1:
+ k1 ^= tail[ 0] << 0;
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+ h2 ^= len;
+ h3 ^= len;
+ h4 ^= len;
+
+ h1 += h2;
+ h1 += h3;
+ h1 += h4;
+ h2 += h1;
+ h3 += h1;
+ h4 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+ h3 = fmix(h3);
+ h4 = fmix(h4);
+
+ h1 += h2;
+ h1 += h3;
+ h1 += h4;
+ h2 += h1;
+ h3 += h1;
+ h4 += h1;
+
+ ((uint32_t*)out)[0] = h1;
+ ((uint32_t*)out)[1] = h2;
+ ((uint32_t*)out)[2] = h3;
+ ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+ const uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint64_t h1 = seed;
+ uint64_t h2 = seed;
+
+ uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+ uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+ //----------
+ // body
+
+ const uint64_t * blocks = (const uint64_t *)(data);
+
+ for(int i = 0; i < nblocks; i++) {
+ uint64_t k1 = getblock(blocks,i*2+0);
+ uint64_t k2 = getblock(blocks,i*2+1);
+
+ k1 *= c1;
+ k1 = ROTL64(k1,31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = ROTL64(h1,27);
+ h1 += h2;
+ h1 = h1*5+0x52dce729;
+
+ k2 *= c2;
+ k2 = ROTL64(k2,33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ h2 = ROTL64(h2,31);
+ h2 += h1;
+ h2 = h2*5+0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint64_t k1 = 0;
+ uint64_t k2 = 0;
+
+ switch(len & 15) {
+ case 15:
+ k2 ^= uint64_t(tail[14]) << 48;
+ case 14:
+ k2 ^= uint64_t(tail[13]) << 40;
+ case 13:
+ k2 ^= uint64_t(tail[12]) << 32;
+ case 12:
+ k2 ^= uint64_t(tail[11]) << 24;
+ case 11:
+ k2 ^= uint64_t(tail[10]) << 16;
+ case 10:
+ k2 ^= uint64_t(tail[ 9]) << 8;
+ case 9:
+ k2 ^= uint64_t(tail[ 8]) << 0;
+ k2 *= c2;
+ k2 = ROTL64(k2,33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ case 8:
+ k1 ^= uint64_t(tail[ 7]) << 56;
+ case 7:
+ k1 ^= uint64_t(tail[ 6]) << 48;
+ case 6:
+ k1 ^= uint64_t(tail[ 5]) << 40;
+ case 5:
+ k1 ^= uint64_t(tail[ 4]) << 32;
+ case 4:
+ k1 ^= uint64_t(tail[ 3]) << 24;
+ case 3:
+ k1 ^= uint64_t(tail[ 2]) << 16;
+ case 2:
+ k1 ^= uint64_t(tail[ 1]) << 8;
+ case 1:
+ k1 ^= uint64_t(tail[ 0]) << 0;
+ k1 *= c1;
+ k1 = ROTL64(k1,31);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+ h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ ((uint64_t*)out)[0] = h1;
+ ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/moses/TranslationModel/CompactPT/MurmurHash3.h b/moses/TranslationModel/CompactPT/MurmurHash3.h
index 58e98204d..54e9d3f9e 100644
--- a/moses/TranslationModel/CompactPT/MurmurHash3.h
+++ b/moses/TranslationModel/CompactPT/MurmurHash3.h
@@ -1,37 +1,37 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index 1e27e3ff0..2c1f1f39e 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -70,9 +70,9 @@ public:
**/
class PhraseDictionary : public DecodeFeature
{
- friend class PhraseDictionaryMultiModelCounts;
- // why is this necessary? that's a derived class, so it should have
- // access to the
+ friend class PhraseDictionaryMultiModelCounts;
+ // why is this necessary? that's a derived class, so it should have
+ // access to the
public:
virtual bool ProvidesPrefixCheck() const;
@@ -104,7 +104,7 @@ public:
virtual
bool
PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const;
-
+
// LEGACY!
// The preferred method is to override GetTargetPhraseCollectionBatch().
// See class PhraseDictionaryMemory or PhraseDictionaryOnDisk for details
@@ -119,7 +119,7 @@ public:
TargetPhraseCollection const *
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src)
{
- return GetTargetPhraseCollectionLEGACY(src);
+ return GetTargetPhraseCollectionLEGACY(src);
}
virtual
diff --git a/moses/TranslationModel/ProbingPT/hash.hh b/moses/TranslationModel/ProbingPT/hash.hh
index a4fcd6330..607238ae1 100644
--- a/moses/TranslationModel/ProbingPT/hash.hh
+++ b/moses/TranslationModel/ProbingPT/hash.hh
@@ -7,7 +7,7 @@
#include <vector>
//Gets the MurmurmurHash for give string
-uint64_t getHash(StringPiece text);
+uint64_t getHash(StringPiece text);
std::vector<uint64_t> getVocabIDs(StringPiece textin);
diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/moses/TranslationModel/ProbingPT/storing.hh
index eb3b1ea53..e1be3bc87 100644
--- a/moses/TranslationModel/ProbingPT/storing.hh
+++ b/moses/TranslationModel/ProbingPT/storing.hh
@@ -2,7 +2,7 @@
#include <cstdio>
#include <fstream>
-#include <iostream>
+#include <iostream>
#include "hash.hh" //Includes line_splitter
#include "probing_hash_utils.hh"
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
index bf449247e..1217b9711 100644
--- a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
+++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
@@ -16,7 +16,7 @@ namespace Moses
if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec;
return (a.tv_nsec >= b.tv_nsec);
}
-#endif
+#endif
bool operator<(timeval const& a, timeval const& b)
{
@@ -30,10 +30,10 @@ namespace Moses
return (a.tv_usec >= b.tv_usec);
}
- void
+ void
bubble_up(std::vector<TPCollWrapper*>& v, size_t k)
{
- if (k >= v.size()) return;
+ if (k >= v.size()) return;
for (;k && (v[k]->tstamp < v[k/2]->tstamp); k /=2)
{
std::swap(v[k],v[k/2]);
@@ -41,7 +41,7 @@ namespace Moses
}
}
- void
+ void
bubble_down(std::vector<TPCollWrapper*>& v, size_t k)
{
for (size_t j = 2*(k+1); j <= v.size(); j = 2*((k=j)+1))
@@ -62,7 +62,7 @@ namespace Moses
TPCollWrapper*
TPCollCache
- ::encache(TPCollWrapper* const& ptr)
+ ::encache(TPCollWrapper* const& ptr)
{
using namespace boost;
// update time stamp:
@@ -76,7 +76,7 @@ namespace Moses
{
vector<TPCollWrapper*>& v = m_history;
if (ptr->idx >= 0) // ptr is already in history
- {
+ {
assert(ptr == v[ptr->idx]);
size_t k = 2 * (ptr->idx + 1);
if (k < v.size()) bubble_up(v,k--);
@@ -88,7 +88,7 @@ namespace Moses
v.push_back(ptr);
bubble_up(v,k);
}
- else // someone else needs to go
+ else // someone else needs to go
{
v[0]->idx = -1;
release(v[0]);
@@ -98,28 +98,28 @@ namespace Moses
}
return ptr;
} // TPCollCache::encache(...)
-
- TPCollWrapper*
+
+ TPCollWrapper*
TPCollCache
- ::get(uint64_t key, size_t revision)
+ ::get(uint64_t key, size_t revision)
{
using namespace boost;
cache_t::iterator m;
- {
+ {
shared_lock<shared_mutex> lock(m_cache_lock);
m = m_cache.find(key);
- if (m == m_cache.end() || m->second->revision != revision)
+ if (m == m_cache.end() || m->second->revision != revision)
return NULL;
++m->second->refCount;
}
-
+
encache(m->second);
return NULL;
} // TPCollCache::get(...)
-
+
void
TPCollCache
- ::add(uint64_t key, TPCollWrapper* ptr)
+ ::add(uint64_t key, TPCollWrapper* ptr)
{
{
boost::unique_lock<boost::shared_mutex> lock(m_cache_lock);
@@ -129,7 +129,7 @@ namespace Moses
}
encache(ptr);
} // TPCollCache::add(...)
-
+
void
TPCollCache
::release(TPCollWrapper*& ptr)
@@ -137,25 +137,25 @@ namespace Moses
if (!ptr) return;
if (--ptr->refCount || ptr->idx >= 0) // tpc is still in use
- {
- ptr = NULL;
- return;
+ {
+ ptr = NULL;
+ return;
}
-
+
#if 0
timespec t; clock_gettime(CLOCK_MONOTONIC,&t);
timespec r; clock_getres(CLOCK_MONOTONIC,&r);
float delta = t.tv_sec - ptr->tstamp.tv_sec;
cerr << "deleting old cache entry after " << delta << " seconds."
- << " clock resolution is " << r.tv_sec << ":" << r.tv_nsec
+ << " clock resolution is " << r.tv_sec << ":" << r.tv_nsec
<< " at " << __FILE__ << ":" << __LINE__ << endl;
#endif
-
+
boost::upgrade_lock<boost::shared_mutex> lock(m_cache_lock);
cache_t::iterator m = m_cache.find(ptr->key);
if (m != m_cache.end() && m->second == ptr)
- { // the cache could have been updated with a new pointer
- // for the same phrase already, so we need to check
+ { // the cache could have been updated with a new pointer
+ // for the same phrase already, so we need to check
// if the pointer we cound is the one we want to get rid of,
// hence the second check
boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
@@ -163,7 +163,7 @@ namespace Moses
}
delete ptr;
ptr = NULL;
- } // TPCollCache::release(...)
+ } // TPCollCache::release(...)
TPCollWrapper::
TPCollWrapper(size_t r, uint64_t k)
@@ -175,5 +175,5 @@ namespace Moses
{
assert(this->refCount == 0);
}
-
+
} // namespace
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
index fc9ce8921..269200647 100644
--- a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
+++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
@@ -5,15 +5,15 @@
namespace Moses
{
- class TPCollWrapper
+ class TPCollWrapper
// wrapper around TargetPhraseCollection that includes reference counts
// and a time stamp for least-recently-used caching of TargetPhraseCollection-s
: public TargetPhraseCollection
{
public:
- size_t const revision;
+ size_t const revision;
// revison; gets changed when the underlying corpus in Mmsapt is updated
-
+
uint64_t const key; // phrase key
uint32_t refCount; // reference count
#if defined(timespec) // timespec is better, but not available everywhere
@@ -32,12 +32,12 @@ namespace Moses
typedef std::vector<TPCollWrapper*> history_t;
cache_t m_cache; // maps from phrase ids to target phrase collections
mutable history_t m_history; // heap of live items, least recently used one on top
-
+
mutable boost::shared_mutex m_cache_lock; // locks m_cache
mutable boost::shared_mutex m_history_lock; // locks m_history
#if 0
- // mutable size_t m_tpc_ctr;
+ // mutable size_t m_tpc_ctr;
// counter of all live item, for debugging. probably obsolete; was used
// to track memory leaks
#endif
@@ -47,14 +47,14 @@ namespace Moses
public:
TPCollCache(size_t capacity=1000);
-
- TPCollWrapper*
+
+ TPCollWrapper*
get(uint64_t key, size_t revision);
- void
+ void
add(uint64_t key, TPCollWrapper* ptr);
- void
+ void
release(TPCollWrapper*& tpc);
};
diff --git a/moses/TranslationModel/UG/bitext-find.cc b/moses/TranslationModel/UG/bitext-find.cc
index 46978d16e..18cc6e0fa 100644
--- a/moses/TranslationModel/UG/bitext-find.cc
+++ b/moses/TranslationModel/UG/bitext-find.cc
@@ -30,15 +30,15 @@ write_sentence
}
}
-bool
-fill(string const& query, TSA<Token> const& tsa,
+bool
+fill(string const& query, TSA<Token> const& tsa,
TokenIndex const& V, bitvector& v)
{
v.resize(tsa.getCorpus()->size());
Bitext<Token>::iter m(&tsa);
- istringstream buf(query); string w;
- while (buf >> w)
- if (!m.extend(V[w]))
+ istringstream buf(query); string w;
+ while (buf >> w)
+ if (!m.extend(V[w]))
return false;
m.markSentences(v);
return true;
@@ -51,7 +51,7 @@ int main(int argc, char* argv[])
{
interpret_args(argc, argv);
if (Q1.empty() && Q2.empty()) exit(0);
-
+
mmbitext B; string w;
B.open(bname, L1, L2);
@@ -64,13 +64,13 @@ int main(int argc, char* argv[])
bitvector check(B.T1->size());
if (Q1.size() == 0 || Q2.size() == 0) check.set();
else (m2.markSentences(check));
-
+
Bitext<Token>::iter& m = m1.size() ? m1 : m2;
char const* x = m.lower_bound(-1);
char const* stop = m.upper_bound(-1);
uint64_t sid;
ushort off;
- boost::taus88 rnd;
+ boost::taus88 rnd;
size_t N = m.approxOccurrenceCount();
maxhits = min(N, maxhits);
size_t k = 0; // selected
@@ -80,7 +80,7 @@ int main(int argc, char* argv[])
x = m.root->readOffset(x,stop,off);
if (!check[sid]) continue;
- size_t r = (N - i) * rnd()/(rnd.max()+1.) + k;
+ size_t r = (N - i) * rnd()/(rnd.max()+1.) + k;
if (maxhits != N && r >= maxhits) continue;
++k;
@@ -94,20 +94,20 @@ int main(int argc, char* argv[])
// cout << "alignment failure" << endl;
}
- cout << sid << " " << B.docname(sid)
+ cout << sid << " " << B.docname(sid)
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
<< "\n";
write_sentence(*B.T1, sid, *B.V1, cout); cout << "\n";
write_sentence(*B.T2, sid, *B.V2, cout); cout << "\n";
- B.write_yawat_alignment(sid,
- m1.size() ? &m1 : NULL,
- m2.size() ? &m2 : NULL, cout);
+ B.write_yawat_alignment(sid,
+ m1.size() ? &m1 : NULL,
+ m2.size() ? &m2 : NULL, cout);
cout << endl;
-
+
}
}
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -120,7 +120,7 @@ interpret_args(int ac, char* av[])
("q1", po::value<string>(&Q1), "query in L1")
("q2", po::value<string>(&Q2), "query in L2")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name of corpus")
@@ -133,7 +133,7 @@ interpret_args(int ac, char* av[])
a.add("bname",1);
a.add("L1",1);
a.add("L2",1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h)
.positional(a)
@@ -141,7 +141,7 @@ interpret_args(int ac, char* av[])
po::notify(vm);
if (vm.count("help"))
{
- cout << "\nusage:\n\t" << av[0]
+ cout << "\nusage:\n\t" << av[0]
<< " [options] [--q1=<L1string>] [--q2=<L2string>]" << endl;
cout << o << endl;
exit(0);
diff --git a/moses/TranslationModel/UG/count-ptable-features.cc b/moses/TranslationModel/UG/count-ptable-features.cc
index b4d2cb4dd..4c9022075 100644
--- a/moses/TranslationModel/UG/count-ptable-features.cc
+++ b/moses/TranslationModel/UG/count-ptable-features.cc
@@ -21,6 +21,6 @@ int main()
cout << PT.GetFeatureNames().size() << endl;
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp b/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
index 073b64dfc..b87aa1d0c 100644
--- a/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
+++ b/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
@@ -11,7 +11,7 @@ namespace ugdiss
using namespace boost::algorithm;
using namespace boost::iostreams;
- filtering_istream*
+ filtering_istream*
open_input_stream(string fname)
{
filtering_istream* ret = new filtering_istream();
@@ -19,7 +19,7 @@ namespace ugdiss
return ret;
}
- filtering_ostream*
+ filtering_ostream*
open_output_stream(string fname)
{
filtering_ostream* ret = new filtering_ostream();
@@ -27,7 +27,7 @@ namespace ugdiss
return ret;
}
- void
+ void
open_input_stream(string fname, filtering_istream& in)
{
if (ends_with(fname, ".gz"))
@@ -41,7 +41,7 @@ namespace ugdiss
in.push(file_source(fname.c_str()));
}
- void
+ void
open_output_stream(string fname, filtering_ostream& out)
{
if (ends_with(fname, ".gz") || ends_with(fname, ".gz_"))
diff --git a/moses/TranslationModel/UG/generic/file_io/ug_stream.h b/moses/TranslationModel/UG/generic/file_io/ug_stream.h
index e2c9e4764..5555e36f8 100644
--- a/moses/TranslationModel/UG/generic/file_io/ug_stream.h
+++ b/moses/TranslationModel/UG/generic/file_io/ug_stream.h
@@ -23,7 +23,7 @@ using namespace boost::iostreams;
/** open input file that is possibly compressed
* decompression filters are automatically added based on the file name
- * gzip for .gz; bzip2 for bz2.
+ * gzip for .gz; bzip2 for bz2.
*/
filtering_istream* open_input_stream(string fname);
void open_input_stream(string fname, filtering_istream& in);
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp b/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp
index 31927ac84..6c1644837 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp
+++ b/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp
@@ -11,7 +11,7 @@ namespace ugdiss
{
using namespace std;
- void
+ void
get_options(int ac, char* av[], progopts& o, posopts& a, optsmap& vm,
char const* cfgFileParam)
{
@@ -30,17 +30,17 @@ namespace ugdiss
}
else
{
- cerr << "Error: cannot find config file '"
+ cerr << "Error: cannot find config file '"
<< cfgFile << "'!" << endl;
exit(1);
}
}
}
-
+
// process positional args, ignoring those set in the config file
if (a.max_total_count())
po::store(po::command_line_parser(ac,av)
- .options(o).positional(a).run(),vm);
+ .options(o).positional(a).run(),vm);
po::notify(vm); // IMPORTANT
}
}
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_get_options.h b/moses/TranslationModel/UG/generic/program_options/ug_get_options.h
index 79b626ef5..636b11302 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_get_options.h
+++ b/moses/TranslationModel/UG/generic/program_options/ug_get_options.h
@@ -6,18 +6,18 @@
#include <boost/program_options.hpp>
-namespace ugdiss
+namespace ugdiss
{
namespace po=boost::program_options;
typedef po::options_description progopts;
typedef po::positional_options_description posopts;
typedef po::variables_map optsmap;
- void
- get_options(int ac, char* av[],
- progopts & o,
- posopts & a,
- optsmap & vm,
+ void
+ get_options(int ac, char* av[],
+ progopts & o,
+ posopts & a,
+ optsmap & vm,
char const* cfgFileParam=NULL);
}
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
index 7dc2cd18f..f30d91acc 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
@@ -5,15 +5,15 @@
#include <boost/foreach.hpp>
namespace Moses {
-
- void
+
+ void
filter_arguments(int const argc_in, char const* const* const argv_in,
- int & argc_moses, char*** argv_moses,
+ int & argc_moses, char*** argv_moses,
int & argc_other, char*** argv_other,
vector<pair<string,int> > const& filter)
{
*argv_moses = new char*[argc_in];
- *argv_other = new char*[argc_in];
+ *argv_other = new char*[argc_in];
(*argv_moses)[0] = new char[strlen(argv_in[0])+1];
strcpy((*argv_moses)[0], argv_in[0]);
argc_moses = 1;
@@ -30,7 +30,7 @@ namespace Moses {
strcpy((*argv_other)[argc_other++],argv_in[i]);
for (int k = 0; k < o.second; ++k)
{
- UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-',
+ UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-',
"[" << HERE << "] Missing argument for "
<< "parameter " << o.first << "!");
(*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
@@ -44,7 +44,7 @@ namespace Moses {
strcpy((*argv_moses)[argc_moses++], argv_in[i++]);
}
}
-
+
} // namespace Moses
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
index e56585e8a..605acee6c 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
@@ -5,12 +5,12 @@
namespace Moses {
using namespace std;
- // Function to splice the argument list (e.g. before handing it over to
+ // Function to splice the argument list (e.g. before handing it over to
// Moses LoadParam() function. /filter/ is a vector of argument names
- // and the number of arguments after each of them
- void
+ // and the number of arguments after each of them
+ void
filter_arguments(int const argc_in, char const* const* const argv_in,
- int & argc_moses, char*** argv_moses,
+ int & argc_moses, char*** argv_moses,
int & argc_other, char*** argv_other,
vector<pair<string,int> > const& filter);
diff --git a/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h b/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h
index f26e28c52..31132c63c 100644
--- a/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h
+++ b/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h
@@ -17,40 +17,40 @@
namespace Moses
{
using namespace std;
- template<typename VAL,
+ template<typename VAL,
typename COMP = greater<VAL>,
typename IDX_T=size_t>
class
- VectorIndexSorter
+ VectorIndexSorter
: public binary_function<IDX_T const&, IDX_T const&, bool>
{
vector<VAL> const& m_vecref;
boost::shared_ptr<COMP> m_comp;
public:
-
+
COMP const& Compare;
VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
: m_vecref(v), Compare(comp) {
}
-
+
VectorIndexSorter(vector<VAL> const& v)
: m_vecref(v), m_comp(new COMP()), Compare(*m_comp) {
}
-
+
bool operator()(IDX_T const & a, IDX_T const & b) const {
bool fwd = Compare(m_vecref.at(a) ,m_vecref.at(b));
bool bwd = Compare(m_vecref[b], m_vecref[a]);
return (fwd == bwd ? a < b : fwd);
}
-
+
boost::shared_ptr<vector<IDX_T> >
GetOrder() const;
-
+
void
GetOrder(vector<IDX_T> & order) const;
-
+
};
-
+
template<typename VAL, typename COMP, typename IDX_T>
boost::shared_ptr<vector<IDX_T> >
VectorIndexSorter<VAL,COMP,IDX_T>::
@@ -60,7 +60,7 @@ namespace Moses
get_order(*ret);
return ret;
}
-
+
template<typename VAL, typename COMP, typename IDX_T>
void
VectorIndexSorter<VAL,COMP,IDX_T>::
@@ -70,6 +70,6 @@ namespace Moses
for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;
sort(order.begin(), order.end(), *this);
}
-
+
}
#endif
diff --git a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc
index 4b61ecd60..877b7a816 100644
--- a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc
+++ b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc
@@ -6,14 +6,14 @@
// string distance measures
// Code by Ulrich Germann
-namespace stringdist
+namespace stringdist
{
- UErrorCode strip_accents(UnicodeString & trg)
+ UErrorCode strip_accents(UnicodeString & trg)
{
UErrorCode status = U_ZERO_ERROR;
- static Transliterator *stripper
- = Transliterator::createInstance("NFD; [:M:] Remove; NFC",
+ static Transliterator *stripper
+ = Transliterator::createInstance("NFD; [:M:] Remove; NFC",
UTRANS_FORWARD, status);
stripper->transliterate(trg);
return status;
@@ -22,9 +22,9 @@ namespace stringdist
char const*
StringDiff::
Segment::
- elabel[] = { "same", "cap", "flip", "permutation",
- "accent", "duplication",
- "insertion", "deletion",
+ elabel[] = { "same", "cap", "flip", "permutation",
+ "accent", "duplication",
+ "insertion", "deletion",
"mismatch", "noinit" };
StringDiff::
@@ -44,7 +44,7 @@ namespace stringdist
Segment()
: start_a(-1), end_a(-1), start_b(-1), end_b(-1), match(noinit), dist(0)
{}
-
+
UnicodeString const&
StringDiff::
set_a(string const& a)
@@ -74,8 +74,8 @@ namespace stringdist
{
return this->b;
}
-
- size_t
+
+ size_t
StringDiff::
size()
{
@@ -94,7 +94,7 @@ namespace stringdist
// if (s.match == same) continue;
// else if (s.match == insertion) ret += s.end_b - s.start_b;
// else if (s.match == deletion) ret += s.end_a - s.start_a;
-
+
// }
// }
@@ -138,7 +138,7 @@ namespace stringdist
#endif
}
- float
+ float
fillAlignmentMatrix(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB,
vector<vector<float> > & M)
@@ -164,7 +164,7 @@ namespace stringdist
return M.back().back();
}
- float
+ float
levenshtein(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB)
{
@@ -180,7 +180,7 @@ namespace stringdist
cout << endl;
}
cout << string(25,'-') << endl;
-#endif
+#endif
int i = M.size() -1;
int j = M.back().size() -1;
@@ -207,29 +207,29 @@ namespace stringdist
return ret;
}
-
+
StringDiff::
Segment::
- Segment(size_t const as, size_t const ae,
+ Segment(size_t const as, size_t const ae,
size_t const bs, size_t const be,
- UnicodeString const& a,
- UnicodeString const& b)
+ UnicodeString const& a,
+ UnicodeString const& b)
{
dist = 0;
- start_a = as; end_a = ae;
+ start_a = as; end_a = ae;
start_b = bs; end_b = be;
if (as == ae)
match = bs == be ? same : insertion;
- else if (bs == be)
+ else if (bs == be)
match = deletion;
- else if (be-bs != ae-as)
+ else if (be-bs != ae-as)
{
match = mismatch;
dist = stringdist::levenshtein(a.getBuffer() + as, ae - as,
b.getBuffer() + bs, be - bs);
}
- else
+ else
{
match = same;
size_t stop = ae-as;
@@ -251,11 +251,11 @@ namespace stringdist
}
}
}
- if (match == insertion)
+ if (match == insertion)
{
dist = be-bs;
}
- else if (match == deletion)
+ else if (match == deletion)
{
dist = ae-as;
}
@@ -309,18 +309,18 @@ namespace stringdist
if (i) --i;
if (j) --j;
}
- for (size_t k = 0; k < A.size(); ++k)
+ for (size_t k = 0; k < A.size(); ++k)
A[k] = min(A[k],A2[k]);
- for (size_t k = 0; k < B.size(); ++k)
+ for (size_t k = 0; k < B.size(); ++k)
B[k] = min(B[k],B2[k]);
-
+
if (a[i] == b[j]) { A[i] = j; B[j] = i; }
i = 0;
j = 0;
size_t I, J;
while (i < a.length() and j < b.length())
{
- if (A[i] < 0)
+ if (A[i] < 0)
{
I = i + 1;
while (I < A.size() and A[I] < 0) ++I;
@@ -338,24 +338,24 @@ namespace stringdist
difflist.push_back(Segment(i,i,j,J,a,b));
j = J;
}
- else
+ else
{
- I = i;
+ I = i;
J = j;
- while(I < A.size() && A[I] >= 0 && J < B.size() && B[J] >= 0)
+ while(I < A.size() && A[I] >= 0 && J < B.size() && B[J] >= 0)
{ ++I; ++J; }
difflist.push_back(Segment(i,I,j,J,a,b));
i = I; j = J;
}
}
- if (i < a.length() || j < b.length())
+ if (i < a.length() || j < b.length())
difflist.push_back(Segment(i,a.length(),j,b.length(),a,b));
diffcnt.assign(noinit,0);
for (size_t i = 0; i < difflist.size(); ++i)
{
Segment & s = difflist[i];
- if (s.match == insertion and
+ if (s.match == insertion and
((s.start_a and a[s.start_a - 1] == b[s.start_b]) or
(s.end_a < a.length() and a[s.end_a] == b[s.start_b])))
{
@@ -364,7 +364,7 @@ namespace stringdist
sameletter = b[i] == b[i-1];
if (sameletter) s.match = duplication;
}
- else if (s.match == deletion and
+ else if (s.match == deletion and
((s.start_b and b[s.start_b - 1] == a[s.start_a]) or
(s.end_b < b.length() and b[s.end_b] == a[s.start_a])))
{
@@ -380,15 +380,15 @@ namespace stringdist
void
StringDiff::
- showDiff(std::ostream& out)
+ showDiff(std::ostream& out)
{
if (difflist.size() == 0) align();
vector<size_t> fromEnd(difflist.size(),0);
for (int d = difflist.size()-1; d-- > 0;)
{
fromEnd[d] = a.length() - difflist[d].end_a;
- // cout << d << " " << fromEnd[d] << " "
- // << difflist[d].start_a << "-"
+ // cout << d << " " << fromEnd[d] << " "
+ // << difflist[d].start_a << "-"
// << difflist[d].end_a << endl;
}
for (size_t d = 0; d < difflist.size(); ++d)
@@ -402,7 +402,7 @@ namespace stringdist
bseg.toUTF8String(bbuf);
out << abuf << " ";
out << bbuf << " ";
- out << s.label() << " "
+ out << s.label() << " "
<< s.dist << " "
<< fromEnd[d]
<< endl;
@@ -423,7 +423,7 @@ namespace stringdist
{
return difflist.at(i);
}
-
+
vector<int> const&
StringDiff::
getFeatures() const
diff --git a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h
index 43fb089f1..8dfcfb58a 100644
--- a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h
+++ b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h
@@ -21,15 +21,15 @@ using namespace std;
//using namespace boost;
using namespace ugdiss;
-namespace stringdist
+namespace stringdist
{
- float
+ float
levenshtein(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB);
UErrorCode strip_accents(UnicodeString & trg);
- float
+ float
fillAlignmentMatrix(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB,
vector<vector<float> > & M);
@@ -37,9 +37,9 @@ namespace stringdist
class StringDiff
{
public:
- enum MATCHTYPE
+ enum MATCHTYPE
{
- same, // a and b are identical
+ same, // a and b are identical
cap, // a and b differ only in capitalization
flip, // two-letter flip
permutation, // a and b have same letters but in different order
@@ -48,7 +48,7 @@ namespace stringdist
insertion, // a is empty
deletion, // b is empty
mismatch, // none of the above
- noinit // not initialized
+ noinit // not initialized
};
struct Segment
@@ -59,9 +59,9 @@ namespace stringdist
MATCHTYPE match;
float dist;
Segment();
- Segment(size_t const as, size_t const ae,
+ Segment(size_t const as, size_t const ae,
size_t const bs, size_t const be,
- UnicodeString const& a,
+ UnicodeString const& a,
UnicodeString const& b);
char const* label() const;
};
diff --git a/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc b/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc
index 662493e18..b4565f99d 100644
--- a/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc
+++ b/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc
@@ -3,10 +3,10 @@ namespace Moses
{
ThreadSafeCounter::
ThreadSafeCounter()
- : ctr(0)
+ : ctr(0)
{ }
- size_t
+ size_t
ThreadSafeCounter::
operator++()
{
@@ -14,21 +14,21 @@ namespace Moses
return ++ctr;
}
- size_t
+ size_t
ThreadSafeCounter::
operator++(int foo)
{
boost::lock_guard<boost::mutex> guard(this->lock);
return ctr++;
}
-
+
ThreadSafeCounter::
operator size_t() const
{
return ctr;
}
- size_t
+ size_t
ThreadSafeCounter::
operator--()
{
@@ -36,13 +36,13 @@ namespace Moses
return --ctr;
}
- size_t
+ size_t
ThreadSafeCounter::
operator--(int foo)
{
boost::lock_guard<boost::mutex> guard(this->lock);
return ctr--;
}
-
-
+
+
}
diff --git a/moses/TranslationModel/UG/mm/calc-coverage.cc b/moses/TranslationModel/UG/mm/calc-coverage.cc
index ef17656d9..83f67220d 100644
--- a/moses/TranslationModel/UG/mm/calc-coverage.cc
+++ b/moses/TranslationModel/UG/mm/calc-coverage.cc
@@ -16,7 +16,7 @@ using namespace ugdiss;
typedef L2R_Token<SimpleWordId> Token;
TokenIndex V;
sptr<vector<vector<Token> > > C(new vector<vector<Token> >());
-void
+void
add_file(string fname)
{
filtering_istream in;
diff --git a/moses/TranslationModel/UG/mm/custom-pt.cc b/moses/TranslationModel/UG/mm/custom-pt.cc
index 93c8c0eb0..1a51aa8a4 100644
--- a/moses/TranslationModel/UG/mm/custom-pt.cc
+++ b/moses/TranslationModel/UG/mm/custom-pt.cc
@@ -31,7 +31,7 @@ using namespace Moses;
using namespace Moses::bitext;
#define CACHING_THRESHOLD 1000
-#define lbop boost::math::binomial_distribution<>::find_lower_bound_on_p
+#define lbop boost::math::binomial_distribution<>::find_lower_bound_on_p
size_t mctr=0,xctr=0;
typedef L2R_Token<SimpleWordId> Token;
@@ -49,15 +49,15 @@ PScoreWC<Token> apply_wp;
vector<float> fweights;
void
-nbest_phrasepairs(uint64_t const pid1,
- pstats const& ps,
+nbest_phrasepairs(uint64_t const pid1,
+ pstats const& ps,
vector<PhrasePair> & nbest)
{
pstats::trg_map_t::const_iterator m;
vector<size_t> idx(nbest.size());
size_t i=0;
- for (m = ps.trg.begin();
- m != ps.trg.end() && i < nbest.size();
+ for (m = ps.trg.begin();
+ m != ps.trg.end() && i < nbest.size();
++m)
{
// cout << m->second.rcnt() << " " << ps.good << endl;
@@ -74,17 +74,17 @@ nbest_phrasepairs(uint64_t const pid1,
++i;
}
// cout << i << " " << nbest.size() << endl;
- if (i < nbest.size())
+ if (i < nbest.size())
{
// cout << "Resizing from " << nbest.size() << " to " << i << endl;
nbest.resize(i);
idx.resize(i);
}
VectorIndexSorter<PhrasePair> sorter(nbest,greater<PhrasePair>());
- if (m != ps.trg.end())
+ if (m != ps.trg.end())
{
make_heap(idx.begin(),idx.end(),sorter);
- PhrasePair cand;
+ PhrasePair cand;
cand.init(pid1,ps,5);
for (; m != ps.trg.end(); ++m)
{
@@ -104,7 +104,7 @@ nbest_phrasepairs(uint64_t const pid1,
}
sort(nbest.begin(),nbest.end(),greater<PhrasePair>());
}
-
+
int main(int argc, char* argv[])
{
// assert(argc == 4);
@@ -120,8 +120,8 @@ int main(int argc, char* argv[])
string L2 = "en";
size_t max_samples = argc > 1 ? atoi(argv[1]) : 1000;
#endif
- char c = *base.rbegin();
- if (c != '/' && c != '.')
+ char c = *base.rbegin();
+ if (c != '/' && c != '.')
base += ".";
fweights.resize(5,.25);
@@ -138,7 +138,7 @@ int main(int argc, char* argv[])
string line;
while (getline(cin,line))
{
- vector<id_type> snt;
+ vector<id_type> snt;
bt.V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
@@ -156,8 +156,8 @@ int main(int argc, char* argv[])
sptr<pstats> s = bt.lookup(m);
for (size_t j = i; j <= k; ++j)
cout << (*bt.V1)[snt[j]] << " ";
- cout << s->good << "/"
- << s->sample_cnt << "/"
+ cout << s->good << "/"
+ << s->sample_cnt << "/"
<< s->raw_cnt << endl;
// vector<PhrasePair> nbest(min(s->trg.size(),size_t(20)));
vector<PhrasePair> nbest(s->trg.size());
@@ -172,17 +172,17 @@ int main(int argc, char* argv[])
cout << " " << setw(6) << pp.score << " ";
for (uint32_t i = off; i < stop; ++i)
cout << (*bt.V2)[o[i].id()] << " ";
- cout << pp.joint << "/"
+ cout << pp.joint << "/"
<< pp.raw1 << "/"
<< pp.raw2 << " |";
- BOOST_FOREACH(float f, pp.fvals)
+ BOOST_FOREACH(float f, pp.fvals)
cout << " " << f;
cout << endl;
}
}
}
}
-#endif
+#endif
exit(0);
}
#endif
diff --git a/moses/TranslationModel/UG/mm/mam2symal.cc b/moses/TranslationModel/UG/mm/mam2symal.cc
index 9610e6f56..eb5034aab 100644
--- a/moses/TranslationModel/UG/mm/mam2symal.cc
+++ b/moses/TranslationModel/UG/mm/mam2symal.cc
@@ -22,7 +22,7 @@ typedef L2R_Token<Conll_Sform> Token;
mmTtrack<char> MAM;
bool with_sids;
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -31,7 +31,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("mamfile", po::value<string>(&mamfile), "mamfile")
@@ -40,7 +40,7 @@ interpret_args(int ac, char* av[])
po::positional_options_description a;
a.add("mamfile",1);
a.add("range",-1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
@@ -56,11 +56,11 @@ interpret_args(int ac, char* av[])
}
}
-void
+void
printRangeMAM(size_t start, size_t stop)
{
for (;start < stop; start++)
- {
+ {
// size_t i = 0;
char const* p = MAM.sntStart(start);
char const* q = MAM.sntEnd(start);
@@ -76,7 +76,7 @@ printRangeMAM(size_t start, size_t stop)
}
}
-int
+int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
@@ -91,7 +91,7 @@ main(int argc, char*argv[])
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
- if (last < MAM.size())
+ if (last < MAM.size())
printRangeMAM(first,last+1);
}
}
diff --git a/moses/TranslationModel/UG/mm/mam_verify.cc b/moses/TranslationModel/UG/mm/mam_verify.cc
index d43539742..798baa947 100644
--- a/moses/TranslationModel/UG/mm/mam_verify.cc
+++ b/moses/TranslationModel/UG/mm/mam_verify.cc
@@ -21,7 +21,7 @@ mmTtrack<char> MAM;
mmTtrack<Token> T1,T2;
bool inv;
vector<string> range;
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -30,7 +30,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("inv,i", po::bool_switch(&inv), "inverse")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name")
@@ -43,7 +43,7 @@ interpret_args(int ac, char* av[])
a.add("L1",1);
a.add("L2",1);
a.add("range",-1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
@@ -87,7 +87,7 @@ check_range(size_t start, size_t stop)
return noAln;
}
-int
+int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
@@ -100,7 +100,7 @@ main(int argc, char*argv[])
exit(1);
}
size_t noAln;
- if (!range.size())
+ if (!range.size())
noAln = check_range(0, MAM.size());
else
{
@@ -112,7 +112,7 @@ main(int argc, char*argv[])
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
- if (last < MAM.size())
+ if (last < MAM.size())
noAln += check_range(first,last+1);
}
}
diff --git a/moses/TranslationModel/UG/mm/mmlex-build.cc b/moses/TranslationModel/UG/mm/mmlex-build.cc
index 5e5ea194c..1e7bee5cb 100644
--- a/moses/TranslationModel/UG/mm/mmlex-build.cc
+++ b/moses/TranslationModel/UG/mm/mmlex-build.cc
@@ -1,8 +1,8 @@
// -*- c++ -*-
// Program to extract word cooccurrence counts from a memory-mapped
// word-aligned bitext stores the counts lexicon in the format for
-// mm2dTable<uint32_t> (ug_mm_2d_table.h)
-//
+// mm2dTable<uint32_t> (ug_mm_2d_table.h)
+//
// (c) 2010-2012 Ulrich Germann
// to do: multi-threading
@@ -20,8 +20,8 @@
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include <boost/math/distributions/binomial.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
#include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
#include "moses/Util.h"
@@ -36,7 +36,7 @@ using namespace boost::math;
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
typedef SimpleWordId Token;
-// DECLARATIONS
+// DECLARATIONS
void interpret_args(int ac, char* av[]);
mmTtrack<Token> T1,T2;
@@ -52,7 +52,7 @@ struct Count
Count(uint32_t ax, uint32_t cx) : a(ax), c(cx) {}
};
-bool
+bool
operator<(pair<id_type,Count> const& a,
pair<id_type,Count> const& b)
{
@@ -72,7 +72,7 @@ public:
countlist_t & LEX;
size_t offset;
size_t skip;
- Counter(countlist_t& lex, size_t o, size_t s)
+ Counter(countlist_t& lex, size_t o, size_t s)
: LEX(lex), offset(o), skip(s) {}
void processSentence(id_type sid);
void operator()();
@@ -83,7 +83,7 @@ int verbose;
size_t truncat;
size_t num_threads;
-void
+void
Counter::
operator()()
{
@@ -105,17 +105,17 @@ struct lexsorter
{
vector<countlist_t> const& v;
id_type wid;
- lexsorter(vector<countlist_t> const& vx, id_type widx)
+ lexsorter(vector<countlist_t> const& vx, id_type widx)
: v(vx),wid(widx) {}
bool operator()(pair<uint32_t,uint32_t> const& a,
pair<uint32_t,uint32_t> const& b) const
{
- return (v.at(a.first).at(wid).at(a.second).first >
+ return (v.at(a.first).at(wid).at(a.second).first >
v.at(b.first).at(wid).at(b.second).first);
}
};
-void
+void
writeTableHeader(ostream& out)
{
filepos_type idxOffset=0;
@@ -159,7 +159,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
H.pop_back();
else
push_heap(H.begin(),H.end(),sorter);
- while (H.size() &&
+ while (H.size() &&
XLEX[H[0].first][id1].at(H[0].second).first == id2)
{
aln += XLEX[H[0].first][id1][H[0].second].second.a;
@@ -178,7 +178,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
numwrite(*aln_out,aln);
m1a[id1] += aln;
m2a[id2] += aln;
- }
+ }
if (coc_out && coc)
{
++CellCountC;
@@ -191,7 +191,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
}
idxa.back() = CellCountA;
idxc.back() = CellCountC;
- if (aln_out)
+ if (aln_out)
{
filepos_type idxOffsetA = aln_out->tellp();
BOOST_FOREACH(id_type foo, idxa)
@@ -201,7 +201,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
aln_out->seekp(0);
numwrite(*aln_out,idxOffsetA);
}
- if (coc_out)
+ if (coc_out)
{
filepos_type idxOffsetC = coc_out->tellp();
BOOST_FOREACH(id_type foo, idxc)
@@ -223,9 +223,9 @@ processSentence(id_type sid)
Token const* e2 = T2.sntEnd(sid);
vector<ushort> cnt1(V1.ksize(),0);
vector<ushort> cnt2(V2.ksize(),0);
- for (Token const* x = s1; x < e1; ++x)
+ for (Token const* x = s1; x < e1; ++x)
++cnt1.at(x->id());
- for (Token const* x = s2; x < e2; ++x)
+ for (Token const* x = s2; x < e2; ++x)
++cnt2.at(x->id());
boost::unordered_set<wpair> seen;
@@ -257,21 +257,21 @@ processSentence(id_type sid)
wpair k(id1,id2);
Count& cnt = CNT[k];
cnt.a++;
- if (seen.insert(k).second)
+ if (seen.insert(k).second)
cnt.c += cnt1[id1] * cnt2[id2];
}
// count unaliged words
- for (size_t i = check1.find_first();
- i < check1.size();
+ for (size_t i = check1.find_first();
+ i < check1.size();
i = check1.find_next(i))
CNT[wpair((s1+i)->id(),0)].a++;
- for (size_t i = check2.find_first();
- i < check2.size();
+ for (size_t i = check2.find_first();
+ i < check2.size();
i = check2.find_next(i))
CNT[wpair(0,(s2+i)->id())].a++;
}
-int
+int
main(int argc, char* argv[])
{
interpret_args(argc,argv);
@@ -299,7 +299,7 @@ main(int argc, char* argv[])
if (cooc.size()) coc_out.close();
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -321,7 +321,7 @@ interpret_args(int ac, char* av[])
("truncate,n", po::value<size_t>(&truncat)->default_value(0),
"truncate corpus to <N> sentences (for debugging)")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
("L1", po::value<string>(&L1),"L1 tag")
diff --git a/moses/TranslationModel/UG/mm/mmlex-lookup.cc b/moses/TranslationModel/UG/mm/mmlex-lookup.cc
index fbdceeaa0..3ba9ef492 100644
--- a/moses/TranslationModel/UG/mm/mmlex-lookup.cc
+++ b/moses/TranslationModel/UG/mm/mmlex-lookup.cc
@@ -1,8 +1,8 @@
// -*- c++ -*-
// Program to extract word cooccurrence counts from a memory-mapped
// word-aligned bitext stores the counts lexicon in the format for
-// mm2dTable<uint32_t> (ug_mm_2d_table.h)
-//
+// mm2dTable<uint32_t> (ug_mm_2d_table.h)
+//
// (c) 2010-2012 Ulrich Germann
// to do: multi-threading
@@ -20,8 +20,8 @@
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include <boost/math/distributions/binomial.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
#include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
#include "ug_mm_2d_table.h"
@@ -35,7 +35,7 @@ using namespace boost::math;
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
typedef SimpleWordId Token;
-// DECLARATIONS
+// DECLARATIONS
void interpret_args(int ac, char* av[]);
string swrd,twrd,L1,L2,bname;
@@ -43,7 +43,7 @@ TokenIndex V1,V2;
LEX_t LEX;
-void
+void
lookup_source(ostream& out, id_type r)
{
vector<LEX_t::Cell> foo(LEX[r].start,LEX[r].stop);
@@ -57,7 +57,7 @@ lookup_source(ostream& out, id_type r)
}
}
-void
+void
lookup_target(ostream& out, id_type c)
{
vector<LEX_t::Cell> foo;
@@ -65,7 +65,7 @@ lookup_target(ostream& out, id_type c)
for (size_t r = 0; r < LEX.numRows; ++r)
{
size_t j = LEX[r][c];
- if (j)
+ if (j)
{
cell.id = r;
cell.val = j;
@@ -82,7 +82,7 @@ lookup_target(ostream& out, id_type c)
}
}
-void
+void
dump(ostream& out)
{
for (size_t r = 0; r < LEX.numRows; ++r)
@@ -91,7 +91,7 @@ dump(ostream& out)
}
-int
+int
main(int argc, char* argv[])
{
interpret_args(argc,argv);
@@ -100,14 +100,14 @@ main(int argc, char* argv[])
V1.open(bname+L1+".tdx");
V2.open(bname+L2+".tdx");
LEX.open(bname+L1+"-"+L2+".lex");
-
+
cout.precision(2);
id_type swid = V1[swrd];
id_type twid = V2[twrd];
if (swid != 1 && twid != 1)
{
- cout << swrd << " " << twrd << " "
- << LEX.m1(swid) << " / "
+ cout << swrd << " " << twrd << " "
+ << LEX.m1(swid) << " / "
<< LEX[swid][twid] << " / "
<< LEX.m2(twid) << endl;
}
@@ -119,7 +119,7 @@ main(int argc, char* argv[])
dump(cout);
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -133,7 +133,7 @@ interpret_args(int ac, char* av[])
("source,s",po::value<string>(&swrd),"source word")
("target,t",po::value<string>(&twrd),"target word")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
("L1", po::value<string>(&L1),"L1 tag")
diff --git a/moses/TranslationModel/UG/mm/mtt-build.cc b/moses/TranslationModel/UG/mm/mtt-build.cc
index f49895ebf..a61cbac3f 100644
--- a/moses/TranslationModel/UG/mm/mtt-build.cc
+++ b/moses/TranslationModel/UG/mm/mtt-build.cc
@@ -46,8 +46,8 @@ bool quiet = false; // no progress reporting
string vocabBase; // base name for existing vocabs that should be used
string baseName; // base name for all files
-string tmpFile, mttFile; /* name of temporary / actual track file
- * (.mtt for Conll format, .mct for plain text)
+string tmpFile, mttFile; /* name of temporary / actual track file
+ * (.mtt for Conll format, .mct for plain text)
*/
string UNK;
@@ -60,7 +60,7 @@ void interpret_args(int ac, char* av[]);
inline uchar rangeCheck(int p, int limit) { return p < limit ? p : 1; }
-id_type
+id_type
get_id(TokenIndex const& T, string const& w)
{
id_type ret = T[w];
@@ -73,21 +73,21 @@ get_id(TokenIndex const& T, string const& w)
return ret;
}
-void
+void
open_vocab(TokenIndex& T, string fname)
{
- if (!access(fname.c_str(), F_OK))
- {
- T.open(fname,UNK);
- assert(T[UNK] == 1);
+ if (!access(fname.c_str(), F_OK))
+ {
+ T.open(fname,UNK);
+ assert(T[UNK] == 1);
}
else T.setUnkLabel(UNK);
if (incremental) T.setDynamic(true);
- assert(T["NULL"] == 0);
+ assert(T["NULL"] == 0);
assert(T[UNK] == 1);
}
-void
+void
ini_cnt_vec(TokenIndex const& T, vector<pair<string,size_t> > & v)
{
v.resize(T.totalVocabSize());
@@ -142,7 +142,7 @@ void fill_rec(Conll_Record& rec, vector<string> const& w)
else if (w.size() >= 8) // CONLL format
{
int id = atoi(w[0].c_str());
- int gov = atoi(w[6].c_str());
+ int gov = atoi(w[6].c_str());
rec.sform = get_id(SF, w[1]);
rec.lemma = get_id(LM, w[2]);
rec.majpos = rangeCheck(get_id(PS, w[3]), 256);
@@ -161,12 +161,12 @@ void log_progress(size_t ctr)
}
else if (ctr % 10000 == 0)
{
- cerr << ".";
+ cerr << ".";
}
}
-size_t
+size_t
process_plain_input(ostream& out, vector<id_type> & s_index)
{
id_type totalWords = 0;
@@ -176,7 +176,7 @@ process_plain_input(ostream& out, vector<id_type> & s_index)
istringstream buf(line);
if (!quiet) log_progress(s_index.size());
s_index.push_back(totalWords);
- while (buf>>w)
+ while (buf>>w)
{
numwrite(out,get_id(SF,w));
++totalWords;
@@ -186,9 +186,9 @@ process_plain_input(ostream& out, vector<id_type> & s_index)
return totalWords;
}
-size_t
-process_tagged_input(ostream& out,
- vector<id_type> & s_index,
+size_t
+process_tagged_input(ostream& out,
+ vector<id_type> & s_index,
vector<id_type> & p_index)
{
string line;
@@ -196,7 +196,7 @@ process_tagged_input(ostream& out,
bool new_sent = true;
bool new_par = true;
id_type totalWords = 0;
-
+
while (getline(cin,line))
{
vector<string> w; string f; istringstream buf(line);
@@ -205,7 +205,7 @@ process_tagged_input(ostream& out,
if (w.size() == 0 || starts_with(w[0], "SID="))
new_sent = true;
- else if (w.size() == 1 && w[0] == "<P>")
+ else if (w.size() == 1 && w[0] == "<P>")
new_par = new_sent = true;
if (w.size() < 3) continue;
@@ -244,7 +244,7 @@ numberize()
index = &p_index;
}
- if (!quiet)
+ if (!quiet)
cerr << endl << "Writing index ... (" << index->size() << " chunks) ";
startIdx = out.tellp();
@@ -261,7 +261,7 @@ numberize()
vector<id_type> smap,lmap,pmap,dmap;
-void
+void
invert(vector<id_type> const& from, vector<id_type> & to)
{
to.resize(from.size());
@@ -269,11 +269,11 @@ invert(vector<id_type> const& from, vector<id_type> & to)
to[from[i]] = i;
}
-// sorts new items based on occurrence counts but won't reassign
+// sorts new items based on occurrence counts but won't reassign
// existing token ids
-void
-conservative_sort(TokenIndex const & V,
- vector<size_t> const & cnt,
+void
+conservative_sort(TokenIndex const & V,
+ vector<size_t> const & cnt,
vector<id_type> & xmap)
{
xmap.resize(V.totalVocabSize());
@@ -344,21 +344,21 @@ void save_vocabs()
string vbase = baseName;
if (is_conll)
{
- if (SF.totalVocabSize() > SF.knownVocabSize())
+ if (SF.totalVocabSize() > SF.knownVocabSize())
write_tokenindex(vbase+".tdx.sfo",SF,smap);
- if (LM.totalVocabSize() > LM.knownVocabSize())
+ if (LM.totalVocabSize() > LM.knownVocabSize())
write_tokenindex(vbase+".tdx.lem",LM,lmap);
- if (PS.totalVocabSize() > PS.knownVocabSize())
+ if (PS.totalVocabSize() > PS.knownVocabSize())
write_tokenindex(vbase+".tdx.pos",PS,pmap);
- if (DT.totalVocabSize() > DT.knownVocabSize())
+ if (DT.totalVocabSize() > DT.knownVocabSize())
write_tokenindex(vbase+".tdx.drl",DT,dmap);
}
- else if (SF.totalVocabSize() > SF.knownVocabSize())
+ else if (SF.totalVocabSize() > SF.knownVocabSize())
write_tokenindex(vbase+".tdx",SF,smap);
}
template<typename Token>
-size_t
+size_t
build_mmTSA(string infile, string outfile)
{
size_t mypid = fork();
@@ -371,14 +371,14 @@ build_mmTSA(string infile, string outfile)
exit(0);
}
-bool
+bool
build_plaintext_tsas()
{
typedef L2R_Token<SimpleWordId> L2R;
typedef R2L_Token<SimpleWordId> R2L;
size_t c = with_sfas + with_pfas;
- if (with_sfas) build_mmTSA<L2R>(tmpFile, baseName + ".sfa");
- if (with_pfas) build_mmTSA<R2L>(tmpFile, baseName + ".pfa");
+ if (with_sfas) build_mmTSA<L2R>(tmpFile, baseName + ".sfa");
+ if (with_pfas) build_mmTSA<R2L>(tmpFile, baseName + ".pfa");
while (c--) wait(NULL);
return true;
}
@@ -388,27 +388,27 @@ void build_conll_tsas()
string bn = baseName;
string mtt = tmpFile;
size_t c = 3 * (with_sfas + with_pfas + with_dcas);
- if (with_sfas)
+ if (with_sfas)
{
build_mmTSA<L2R_Token<Conll_Sform> >(mtt,bn+".sfa-sform");
build_mmTSA<L2R_Token<Conll_Lemma> >(mtt,bn+".sfa-lemma");
build_mmTSA<L2R_Token<Conll_MinPos> >(mtt,bn+".sfa-minpos");
}
- if (with_pfas)
+ if (with_pfas)
{
build_mmTSA<R2L_Token<Conll_Sform> >(mtt,bn+".pfa-sform");
build_mmTSA<R2L_Token<Conll_Lemma> >(mtt,bn+".pfa-lemma");
build_mmTSA<R2L_Token<Conll_MinPos> >(mtt,bn+".pfa-minpos");
}
- if (with_dcas)
+ if (with_dcas)
{
- build_mmTSA<ConllBottomUpToken<Conll_Sform> >(mtt,bn+".dca-sform");
- build_mmTSA<ConllBottomUpToken<Conll_Lemma> >(mtt,bn+".dca-lemma");
+ build_mmTSA<ConllBottomUpToken<Conll_Sform> >(mtt,bn+".dca-sform");
+ build_mmTSA<ConllBottomUpToken<Conll_Lemma> >(mtt,bn+".dca-lemma");
build_mmTSA<ConllBottomUpToken<Conll_MinPos> >(mtt,bn+".dca-minpos");
}
- while (c--) wait(NULL);
+ while (c--) wait(NULL);
}
@@ -430,7 +430,7 @@ int main(int argc, char* argv[])
rename(tmpFile.c_str(),mttFile.c_str());
}
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -439,10 +439,10 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
- ("quiet,q", po::bool_switch(&quiet),
+ ("quiet,q", po::bool_switch(&quiet),
"don't print progress information")
- ("incremental,i", po::bool_switch(&incremental),
+ ("incremental,i", po::bool_switch(&incremental),
"incremental mode; rewrites vocab files!")
("vocab-base,v", po::value<string>(&vocabBase),
@@ -451,15 +451,15 @@ interpret_args(int ac, char* av[])
("output,o", po::value<string>(&baseName),
"base file name of the resulting file(s)")
- ("sfa,s", po::value<int>(&with_sfas)->default_value(1),
+ ("sfa,s", po::value<int>(&with_sfas)->default_value(1),
"also build suffix arrays")
("pfa,p", po::value<int>(&with_pfas)
- ->default_value(0)->implicit_value(1),
+ ->default_value(0)->implicit_value(1),
"also build prefix arrays")
("dca,d", po::value<int>(&with_dcas)
- ->default_value(0)->implicit_value(1),
+ ->default_value(0)->implicit_value(1),
"also build dependency chain arrays")
("conll,c", po::bool_switch(&is_conll),
@@ -468,18 +468,18 @@ interpret_args(int ac, char* av[])
("unk,u", po::value<string>(&UNK)->default_value("UNK"),
"label for unknown tokens")
- // ("map,m", po::value<string>(&vmap),
+ // ("map,m", po::value<string>(&vmap),
// "map words to word classes for indexing")
-
+
;
-
+
po::options_description h("Hidden Options");
h.add_options()
;
h.add(o);
po::positional_options_description a;
a.add("output",1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h)
.positional(a)
@@ -487,7 +487,7 @@ interpret_args(int ac, char* av[])
po::notify(vm);
if (vm.count("help") || !vm.count("output"))
{
- cout << "\nusage:\n\t cat <corpus> | " << av[0]
+ cout << "\nusage:\n\t cat <corpus> | " << av[0]
<< " [options] <output .mtt file>" << endl;
cout << o << endl;
exit(0);
diff --git a/moses/TranslationModel/UG/mm/mtt-count-words.cc b/moses/TranslationModel/UG/mm/mtt-count-words.cc
index c9b435477..223ba2090 100644
--- a/moses/TranslationModel/UG/mm/mtt-count-words.cc
+++ b/moses/TranslationModel/UG/mm/mtt-count-words.cc
@@ -36,7 +36,7 @@ int main(int argc, char* argv[])
{
interpret_args(argc,argv);
T.open(bname+".mct");
- V.open(bname+".tdx");
+ V.open(bname+".tdx");
vector<size_t> cnt(V.ksize(),0);
for (size_t sid = 0; sid < T.size(); ++sid)
{
@@ -48,7 +48,7 @@ int main(int argc, char* argv[])
exit(0);
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -60,7 +60,7 @@ interpret_args(int ac, char* av[])
o.add_options()
("help,h", "print this message")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
;
diff --git a/moses/TranslationModel/UG/mm/mtt-demo1.cc b/moses/TranslationModel/UG/mm/mtt-demo1.cc
index a253e9ed3..d3506fa0f 100644
--- a/moses/TranslationModel/UG/mm/mtt-demo1.cc
+++ b/moses/TranslationModel/UG/mm/mtt-demo1.cc
@@ -21,17 +21,17 @@ int main(int argc, char* argv[])
using namespace std;
if (argc < 3)
{
- cerr << "usage: " << argv[0] << " <track base name> lookup word sequence"
+ cerr << "usage: " << argv[0] << " <track base name> lookup word sequence"
<< endl;
}
string base = argv[1];
- TokenIndex V;
+ TokenIndex V;
V.open(base+".tdx");
- boost::shared_ptr<mmTtrack<Token> > T(new mmTtrack<Token>());
+ boost::shared_ptr<mmTtrack<Token> > T(new mmTtrack<Token>());
T->open(base+".mct");
mmTSA<Token> I; I.open(base+".sfa",T);
mmTSA<Token>::tree_iterator m(&I);
-
+
// look up the search string m.extend() returns true upon success
for (int i = 2; i < argc && m.extend(V[argv[i]]); ++i);
if (int(m.size() + 2) < argc)
@@ -39,7 +39,7 @@ int main(int argc, char* argv[])
cerr << "NOT FOUND" << endl;
exit(1);
}
-
+
tsa::ArrayEntry e(m.lower_bound(-1));
char const* stop = m.upper_bound(-1);
do
diff --git a/moses/TranslationModel/UG/mm/mtt-dump.cc b/moses/TranslationModel/UG/mm/mtt-dump.cc
index b7d85d623..eea1bb400 100644
--- a/moses/TranslationModel/UG/mm/mtt-dump.cc
+++ b/moses/TranslationModel/UG/mm/mtt-dump.cc
@@ -25,7 +25,7 @@ bool sform;
bool have_mtt, have_mct;
bool with_sids;
bool with_positions;
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -36,7 +36,7 @@ interpret_args(int ac, char* av[])
("sform,s", po::bool_switch(&sform), "sform only")
("with-positions,p", po::bool_switch(&with_positions), "show word positions")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name")
@@ -45,7 +45,7 @@ interpret_args(int ac, char* av[])
po::positional_options_description a;
a.add("bname",1);
a.add("range",-1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
@@ -63,11 +63,11 @@ interpret_args(int ac, char* av[])
mct = bname+".mct";
}
-void
+void
printRangeMTT(size_t start, size_t stop)
{
for (;start < stop; start++)
- {
+ {
size_t i = 0;
Token const* s = MTT.sntStart(start);
Token const* e = MTT.sntEnd(start);
@@ -92,7 +92,7 @@ printRangeMTT(size_t start, size_t stop)
cout << i+t->parent << " ";
cout << DT[t->dtype] << endl;
}
- else
+ else
{
if (with_positions) cout << t-s << ":";
cout << SF[t->id()] << " ";
@@ -102,16 +102,16 @@ printRangeMTT(size_t start, size_t stop)
}
}
-void
+void
printRangeMCT(size_t start, size_t stop)
{
for (;start < stop; start++)
- {
+ {
SimpleWordId const* s = MCT.sntStart(start);
SimpleWordId const* t = s;
SimpleWordId const* e = MCT.sntEnd(start);
if (with_sids) cout << start << " ";
- while (t < e)
+ while (t < e)
{
if (with_positions) cout << t-s << ":";
cout << SF[(t++)->id()] << " ";
@@ -120,7 +120,7 @@ printRangeMCT(size_t start, size_t stop)
}
}
-int
+int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
@@ -139,14 +139,14 @@ main(int argc, char*argv[])
DT.open(bname+".tdx.drl"); DT.iniReverseIndex();
MTT.open(mtt);
}
- else
+ else
{
sform = true;
SF.open(bname+".tdx"); SF.iniReverseIndex();
MCT.open(mct);
}
-
- if (!range.size())
+
+ if (!range.size())
have_mtt ? printRangeMTT(0, MTT.size()) : printRangeMCT(0, MCT.size());
else
{
@@ -157,9 +157,9 @@ main(int argc, char*argv[])
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
- if (have_mtt && last < MTT.size())
+ if (have_mtt && last < MTT.size())
printRangeMTT(first,last+1);
- else if (last < MCT.size())
+ else if (last < MCT.size())
printRangeMCT(first,last+1);
}
}
diff --git a/moses/TranslationModel/UG/mm/mtt.count.cc b/moses/TranslationModel/UG/mm/mtt.count.cc
index 423c12ec7..1e2382f67 100644
--- a/moses/TranslationModel/UG/mm/mtt.count.cc
+++ b/moses/TranslationModel/UG/mm/mtt.count.cc
@@ -36,14 +36,14 @@ bool echo;
int main(int argc, char* argv[])
{
interpret_args(argc,argv);
-
+
T.open(bname+".mct");
V.open(bname+".tdx"); V.iniReverseIndex();
I.open(bname+".sfa",&T);
string line;
while (getline(cin,line))
{
- vector<id_type> phr;
+ vector<id_type> phr;
V.fillIdSeq(line,phr);
TSA<Token>::tree_iterator m(&I);
size_t i = 0;
@@ -55,7 +55,7 @@ int main(int argc, char* argv[])
exit(0);
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -68,7 +68,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("echo,e", po::bool_switch(&echo), "repeat lookup phrases")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
;
diff --git a/moses/TranslationModel/UG/mm/num_read_write.cc b/moses/TranslationModel/UG/mm/num_read_write.cc
index 403f7d300..5c281d9dd 100644
--- a/moses/TranslationModel/UG/mm/num_read_write.cc
+++ b/moses/TranslationModel/UG/mm/num_read_write.cc
@@ -2,7 +2,7 @@
namespace ugdiss {
typedef unsigned char uchar;
- void
+ void
numwrite(std::ostream& out, uint16_t const& x)
{
char buf[2];
@@ -11,7 +11,7 @@ namespace ugdiss {
out.write(buf,2);
}
- void
+ void
numwrite(std::ostream& out, uint32_t const& x)
{
char buf[4];
@@ -22,7 +22,7 @@ namespace ugdiss {
out.write(buf,4);
}
- void
+ void
numwrite(std::ostream& out, uint64_t const& x)
{
char buf[8];
@@ -37,7 +37,7 @@ namespace ugdiss {
out.write(buf,8);
}
- char const*
+ char const*
numread(char const* src, uint16_t & x)
{
uchar const* d = reinterpret_cast<uchar const*>(src);
@@ -45,28 +45,28 @@ namespace ugdiss {
return src+2;
}
- char const*
+ char const*
numread(char const* src, uint32_t & x)
{
uchar const* d = reinterpret_cast<uchar const*>(src);
- x = ((uint32_t(d[0])<<0) |
- (uint32_t(d[1])<<8) |
- (uint32_t(d[2])<<16)|
+ x = ((uint32_t(d[0])<<0) |
+ (uint32_t(d[1])<<8) |
+ (uint32_t(d[2])<<16)|
(uint32_t(d[3])<<24));
return src+4;
}
- char const*
+ char const*
numread(char const* src, uint64_t & x)
{
uchar const* d = reinterpret_cast<uchar const*>(src);
- x = ((uint64_t(d[0])<<0) |
- (uint64_t(d[1])<<8) |
- (uint64_t(d[2])<<16) |
+ x = ((uint64_t(d[0])<<0) |
+ (uint64_t(d[1])<<8) |
+ (uint64_t(d[2])<<16) |
(uint64_t(d[3])<<24) |
- (uint64_t(d[4])<<32) |
- (uint64_t(d[5])<<40) |
- (uint64_t(d[6])<<48) |
+ (uint64_t(d[4])<<32) |
+ (uint64_t(d[5])<<40) |
+ (uint64_t(d[6])<<48) |
(uint64_t(d[7])<<56));
return src+8;
}
diff --git a/moses/TranslationModel/UG/mm/num_read_write.h b/moses/TranslationModel/UG/mm/num_read_write.h
index 6fdcecc81..f83e1c982 100644
--- a/moses/TranslationModel/UG/mm/num_read_write.h
+++ b/moses/TranslationModel/UG/mm/num_read_write.h
@@ -14,11 +14,11 @@ namespace ugdiss {
void numwrite(std::ostream& out, uint16_t const& x);
void numwrite(std::ostream& out, uint32_t const& x);
void numwrite(std::ostream& out, uint64_t const& x);
-
+
char const* numread(char const* src, uint16_t & x);
char const* numread(char const* src, uint32_t & x);
char const* numread(char const* src, uint64_t & x);
-
+
// template<typename uintNumber>
// void
// numwrite(std::ostream& out, uintNumber const& x)
@@ -54,7 +54,7 @@ namespace ugdiss {
// case 8: x = bswap_64(x); break;
// default: break;
// }
-// #endif
+// #endif
// }
// template<typename uintNumber>
@@ -71,7 +71,7 @@ namespace ugdiss {
// case 8: x = bswap_64(x); break;
// default: break;
// }
-// #endif
+// #endif
// return src+sizeof(uintNumber);
// }
} // end of namespace ugdiss
diff --git a/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h b/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h
index 1810027af..e5e9ca88c 100644
--- a/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h
+++ b/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h
@@ -39,8 +39,8 @@ namespace Moses {
class jstats; // phrase pair ("joint") statistics
class agenda
{
- boost::mutex lock;
- boost::condition_variable ready;
+ boost::mutex lock;
+ boost::condition_variable ready;
class job;
class worker;
list<job> joblist;
@@ -52,9 +52,9 @@ namespace Moses {
agenda(bitext_base const& bitext);
~agenda();
void add_workers(int n);
- sptr<pstats> add_job(mmbitext::iter const& phrase,
+ sptr<pstats> add_job(mmbitext::iter const& phrase,
size_t const max_samples);
- bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
+ bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
bool & fwd, sptr<bitext_base::pstats> & stats);
};
@@ -65,22 +65,22 @@ namespace Moses {
mmTtrack<char> Tx; // word alignments
mmTtrack<Token> T1,T2; // token tracks
TokenIndex V1,V2; // vocabs
- mmTSA<Token> I1,I2; // suffix arrays
+ mmTSA<Token> I1,I2; // suffix arrays
/// given the source phrase sid[start:stop]
- // find the possible start (s1 .. s2) and end (e1 .. e2)
+ // find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
- // alignments in *core_alignment. If /flip/, source phrase is
+ // alignments in *core_alignment. If /flip/, source phrase is
// L2.
- bool
+ bool
find_trg_phr_bounds
- (size_t const sid, size_t const start, size_t const stop,
- size_t & s1, size_t & s2, size_t & e1, size_t & e2,
+ (size_t const sid, size_t const start, size_t const stop,
+ size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
- sptr<pstats>
+ sptr<pstats>
prep2(iter const& phrase);
public:
mmbitext();
@@ -98,8 +98,8 @@ namespace Moses {
jstats
{
uint32_t my_rcnt; // unweighted count
- float my_wcnt; // weighted count
- vector<pair<size_t, vector<uchar> > > my_aln;
+ float my_wcnt; // weighted count
+ vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
@@ -110,22 +110,22 @@ namespace Moses {
void add(float w, vector<uchar> const& a);
};
- struct
+ struct
mmbitext::
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
- size_t raw_cnt; // (approximate) raw occurrence count
+ size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
- // size_t snt_cnt;
+ // size_t snt_cnt;
// size_t sample_snt;
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
- pstats();
+ pstats();
// vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
@@ -142,7 +142,7 @@ namespace Moses {
public:
worker(agenda& a);
void operator()();
-
+
};
class
diff --git a/moses/TranslationModel/UG/mm/symal2mam.cc b/moses/TranslationModel/UG/mm/symal2mam.cc
index 631d4ae07..6d0af57b0 100644
--- a/moses/TranslationModel/UG/mm/symal2mam.cc
+++ b/moses/TranslationModel/UG/mm/symal2mam.cc
@@ -2,9 +2,9 @@
// program to convert GIZA-style alignments into memory-mapped format
// (c) 2010 Ulrich Germann
-// Reads from stdin a file with alternating lines: sentence lengths and symal output.
-// We need the sentence lenghts for sanity checks, because GIZA alignment might skip
-// sentences. If --skip, we skip such sentence pairs, otherwise, we leave the word
+// Reads from stdin a file with alternating lines: sentence lengths and symal output.
+// We need the sentence lenghts for sanity checks, because GIZA alignment might skip
+// sentences. If --skip, we skip such sentence pairs, otherwise, we leave the word
// alignment matrix blank.
#include "ug_mm_ttrack.h"
@@ -24,7 +24,7 @@
#include "util/exception.hh"
// #include "headers-base/util/check.hh"
-// NOTE TO SELF:
+// NOTE TO SELF:
/* Program to filter out sentences that GIZA will skip or truncate,
* i.e. sentences longer than 100 words or sentence pairs with a length
*/
@@ -42,7 +42,7 @@ TokenIndex V1;
string mtt1name,mtt2name,o1name,o2name,mamname,cfgFile;
string dataFormat,A3filename;
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -63,7 +63,7 @@ interpret_args(int ac, char* av[])
("t2", po::value<string>(&mtt2name), "file name of L2 mapped token track")
("format,F", po::value<string>(&dataFormat)->default_value("plain"), "data format (plain or conll)")
;
-
+
h.add_options()
("mamname", po::value<string>(&mamname), "name of output file for mam")
;
@@ -76,8 +76,8 @@ interpret_args(int ac, char* av[])
if (vm.count("help") || mamname.empty())
{
cout << "usage:\n"
- << "\t\n"
- << "\t ... | " << av[0]
+ << "\t\n"
+ << "\t ... | " << av[0]
<< " <.mam file> \n" << endl;
cout << o << endl;
cout << "If an A3 file is given (as produced by (m)giza), symal2mam performs\n"
@@ -117,8 +117,8 @@ procSymalLine(string const& line, ostream& out)
{
cerr << a << "-" << b << " " << len1 << "/" << len2 << endl;
}
- assert(len1 == 0 || a<len1);
- assert(len2 == 0 || b<len2);
+ assert(len1 == 0 || a<len1);
+ assert(len2 == 0 || b<len2);
binwrite(out,a);
binwrite(out,b);
}
@@ -138,7 +138,7 @@ void finiMAM(ofstream& out, vector<id_type>& idx, id_type numTok)
out.close();
}
-void
+void
finalize(ofstream& out, vector<id_type> const& idx, id_type tokenCount)
{
id_type idxSize = idx.size();
@@ -184,7 +184,7 @@ go()
while(getline(cin,line))
{
idxm.push_back(procSymalLine(line,mam));
- if (debug && ++ctr%100000==0)
+ if (debug && ++ctr%100000==0)
cerr << ctr/1000 << "K lines processed" << endl;
}
finiMAM(mam,idxm,0);
@@ -208,20 +208,20 @@ go(string t1name, string t2name, string A3filename)
for (sid = 0; sid < T1.size(); ++sid)
{
- len1 = T1.sntLen(sid);
+ len1 = T1.sntLen(sid);
len2 = T2.sntLen(sid);
- if (debug)
- cerr << "[" << lineCtr << "] "
- << len1 << " (" << check1 << ") / "
+ if (debug)
+ cerr << "[" << lineCtr << "] "
+ << len1 << " (" << check1 << ") / "
<< len2 << " (" << check2 << ")" << endl;
- if ((check1 >=0 && check1!=len1) ||
+ if ((check1 >=0 && check1!=len1) ||
(check2 >=0 && check2!=len2))
{
if (skip)
{
- cerr << "[" << ++skipCtr << "] skipping "
- << check1 << "/" << check2 << " vs. "
- << len1 << "/" << len2
+ cerr << "[" << ++skipCtr << "] skipping "
+ << check1 << "/" << check2 << " vs. "
+ << len1 << "/" << len2
<< " at line " << lineCtr << endl;
}
else
@@ -238,9 +238,9 @@ go(string t1name, string t2name, string A3filename)
}
if (skip)
{
- idx1.push_back(tokenCount1 += len1);
+ idx1.push_back(tokenCount1 += len1);
copySentence(T1,sid,t1out);
- idx2.push_back(tokenCount2 += len2);
+ idx2.push_back(tokenCount2 += len2);
copySentence(T2,sid,t2out);
}
@@ -250,7 +250,7 @@ go(string t1name, string t2name, string A3filename)
lineCtr++;
idxm.push_back(procSymalLine(line,mam));
if (debug) cerr << "[" << lineCtr << "] "
- << check1 << " (" << len1 <<") "
+ << check1 << " (" << len1 <<") "
<< check2 << " (" << len2 <<") "
<< line << endl;
getCheckValues(A3file,check1,check2);
@@ -264,7 +264,7 @@ go(string t1name, string t2name, string A3filename)
cout << idxm.size() << endl;
}
-void
+void
initialize(ofstream& out, string const& fname)
{
out.open(fname.c_str());
diff --git a/moses/TranslationModel/UG/mm/tpt_pickler.cc b/moses/TranslationModel/UG/mm/tpt_pickler.cc
index c23913fc2..353e5b901 100644
--- a/moses/TranslationModel/UG/mm/tpt_pickler.cc
+++ b/moses/TranslationModel/UG/mm/tpt_pickler.cc
@@ -73,45 +73,45 @@ namespace ugdiss
data += T(c&mask) << 63;
}
- void
- binwrite(std::ostream& out, unsigned char data)
- {
+ void
+ binwrite(std::ostream& out, unsigned char data)
+ {
binwrite_unsigned_integer(out, data);
}
- void
+ void
binwrite(std::ostream& out, unsigned short data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
- void
+ void
binwrite(std::ostream& out, unsigned long data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
- void
+ void
binwrite(std::ostream& out, unsigned long long data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
#if __WORDSIZE == 64
- void
+ void
binwrite(std::ostream& out, unsigned int data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
-#else
- void
+#else
+ void
binwrite(std::ostream& out, size_t data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
#endif
- void
+ void
binread(std::istream& in, unsigned short& data)
{
assert(sizeof(data)==2);
@@ -127,7 +127,7 @@ namespace ugdiss
data += uint16_t(c&mask) << 14;
}
- void
+ void
binread(std::istream& in, unsigned int& data)
{
assert(sizeof(data) == 4);
@@ -149,7 +149,7 @@ namespace ugdiss
data += uint32_t(c&mask) << 28;
}
- void
+ void
binread(std::istream& in, unsigned long& data)
{
#if __WORDSIZE == 32
@@ -185,16 +185,16 @@ namespace ugdiss
data += static_cast<unsigned long long>(c&mask) << 49;
if (c < 0) return;
in.get(c);
-
+
data += static_cast<unsigned long long>(c&mask) << 56;
if (c < 0) return;
in.get(c);
-
+
data += static_cast<unsigned long long>(c&mask) << 63;
#endif
}
- void
+ void
binread(std::istream& in, unsigned long long& data)
{
assert(sizeof(unsigned long long)==8);
@@ -231,14 +231,14 @@ namespace ugdiss
}
// writing and reading strings ...
- void
+ void
binwrite(std::ostream& out, std::string const& s)
{
size_t len = s.size();
ugdiss::binwrite(out,len);
out.write(s.c_str(),len);
}
-
+
void
binread(std::istream& in, std::string& s)
{
@@ -250,28 +250,28 @@ namespace ugdiss
buf[len] = 0;
s = buf;
}
-
+
void
binwrite(std::ostream& out, float x)
- {
- // IMPORTANT: this is not robust against the big/little endian
- // issue.
- out.write(reinterpret_cast<char*>(&x),sizeof(float));
+ {
+ // IMPORTANT: this is not robust against the big/little endian
+ // issue.
+ out.write(reinterpret_cast<char*>(&x),sizeof(float));
}
-
+
void
binread(std::istream& in, float& x)
- {
- // IMPORTANT: this is not robust against the big/little endian
- // issue.
- in.read(reinterpret_cast<char*>(&x),sizeof(x));
+ {
+ // IMPORTANT: this is not robust against the big/little endian
+ // issue.
+ in.read(reinterpret_cast<char*>(&x),sizeof(x));
}
-
+
char const *binread(char const* p, uint16_t& buf)
{
static char mask = 127;
- buf = (*p)&mask;
+ buf = (*p)&mask;
if (*p++ < 0) return p;
buf += uint16_t((*p)&mask)<<7;
if (*p++ < 0) return p;
@@ -294,26 +294,26 @@ namespace ugdiss
char const *binread(char const* p, uint32_t& buf)
{
static char mask = 127;
-
- if (*p < 0)
- {
- buf = (*p)&mask;
- return ++p;
+
+ if (*p < 0)
+ {
+ buf = (*p)&mask;
+ return ++p;
}
buf = *p;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<7;
return ++p;
}
buf += uint32_t(*p)<<7;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<14;
return ++p;
}
buf += uint32_t(*p)<<14;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<21;
return ++p;
@@ -331,56 +331,56 @@ namespace ugdiss
char const *binread(char const* p, filepos_type& buf)
{
static char mask = 127;
-
- if (*p < 0)
- {
- buf = (*p)&mask;
- return ++p;
+
+ if (*p < 0)
+ {
+ buf = (*p)&mask;
+ return ++p;
}
buf = *p;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<7;
return ++p;
}
buf += filepos_type(*p)<<7;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<14;
return ++p;
}
buf += filepos_type(*p)<<14;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<21;
return ++p;
}
buf += filepos_type(*p)<<21;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<28;
return ++p;
}
buf += filepos_type(*p)<<28;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<35;
return ++p;
}
buf += filepos_type(*p)<<35;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<42;
return ++p;
}
buf += filepos_type(*p)<<42;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<49;
return ++p;
}
buf += filepos_type(*p)<<49;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<56;
return ++p;
diff --git a/moses/TranslationModel/UG/mm/tpt_pickler.h b/moses/TranslationModel/UG/mm/tpt_pickler.h
index 7305a858e..5ac71c16d 100644
--- a/moses/TranslationModel/UG/mm/tpt_pickler.h
+++ b/moses/TranslationModel/UG/mm/tpt_pickler.h
@@ -17,30 +17,30 @@ namespace ugdiss
/// @return the size of file fname.
::uint64_t getFileSize(const std::string& fname);
- /**
- * The following functions write and read data in a compact binary
+ /**
+ * The following functions write and read data in a compact binary
* representation. Write and read errors can be checked directly
* on the ostream object after the function call, so no return value is
* necessary.*/
- void binwrite(std::ostream& out, char data);
- void binwrite(std::ostream& out, unsigned char data);
+ void binwrite(std::ostream& out, char data);
+ void binwrite(std::ostream& out, unsigned char data);
void binwrite(std::ostream& out, unsigned short data);
void binwrite(std::ostream& out, unsigned int data);
void binwrite(std::ostream& out, unsigned long data);
void binwrite(std::ostream& out, size_t data);
void binwrite(std::ostream& out, unsigned long long data);
void binwrite(std::ostream& out, std::string const& data);
- void binwrite(std::ostream& out, float data);
+ void binwrite(std::ostream& out, float data);
- void binread(std::istream& in, char &data);
- void binread(std::istream& in, unsigned char &data);
+ void binread(std::istream& in, char &data);
+ void binread(std::istream& in, unsigned char &data);
void binread(std::istream& in, unsigned short &data);
void binread(std::istream& in, unsigned int &data);
void binread(std::istream& in, unsigned long &data);
void binread(std::istream& in, size_t &data);
void binread(std::istream& in, unsigned long long &data);
void binread(std::istream& in, std::string &data);
- void binread(std::istream& in, float &data);
+ void binread(std::istream& in, float &data);
char const *binread(char const* p, uint16_t& buf);
char const *binread(char const* p, uint32_t& buf);
@@ -68,11 +68,11 @@ namespace ugdiss
/*
template<typename WHATEVER>
- char const*
+ char const*
binread(char const* p, WHATEVER* buf);
template<typename numtype>
- char const*
+ char const*
binread(char const* p, numtype& buf);
*/
@@ -113,7 +113,7 @@ namespace ugdiss
p = binread(p,v[i]);
return p;
}
-
+
template<typename T>
T read(std::istream& in)
{
@@ -132,7 +132,7 @@ namespace ugdiss
template<typename T>
- void
+ void
binwrite(std::ostream& out, std::vector<T> const& data)
{
binwrite(out,data.size());
@@ -141,7 +141,7 @@ namespace ugdiss
}
template<typename T>
- void
+ void
binread(std::istream& in, std::vector<T>& data)
{
size_t s;
@@ -157,8 +157,8 @@ namespace ugdiss
{
size_t s; K k; V v;
binread(in,s);
- data.clear();
- // I have no idea why this is necessary, but it is, even when
+ data.clear();
+ // I have no idea why this is necessary, but it is, even when
// /data/ is supposed to be empty
for (size_t i = 0; i < s; i++)
{
@@ -174,7 +174,7 @@ namespace ugdiss
binwrite(std::ostream& out, std::map<K,V> const& data)
{
binwrite(out,data.size());
- for (typename std::map<K,V>::const_iterator m = data.begin();
+ for (typename std::map<K,V>::const_iterator m = data.begin();
m != data.end(); m++)
{
binwrite(out,m->first);
@@ -200,7 +200,7 @@ namespace ugdiss
template<typename WHATEVER>
- char const*
+ char const*
binread(char const* p, WHATEVER* buf)
{
#ifdef VERIFY_TIGHT_PACKING
@@ -209,6 +209,6 @@ namespace ugdiss
return binread(p,*buf);
}
-
+
} // end namespace ugdiss
#endif
diff --git a/moses/TranslationModel/UG/mm/tpt_tightindex.cc b/moses/TranslationModel/UG/mm/tpt_tightindex.cc
index da28c6d93..72cf0c183 100644
--- a/moses/TranslationModel/UG/mm/tpt_tightindex.cc
+++ b/moses/TranslationModel/UG/mm/tpt_tightindex.cc
@@ -8,10 +8,10 @@
*/
//
// ugTightIndex.cc
-//
+//
// Made by Ulrich Germann
// Login <germann@germann-laptop>
-//
+//
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
//
@@ -63,7 +63,7 @@ namespace ugdiss
// }
// #define LOG_WRITE_ACTIVITY
-
+
// write a key or value into a tight index
// flag indicates wheter it's a key or a value
void tightwrite(std::ostream& out, uint64_t data, bool flag)
@@ -80,10 +80,10 @@ namespace ugdiss
std::cerr << " with flag 1 ";
#endif
while (data >= 128)
- {
+ {
char c = char(data%128)|char(-128);
- out.put(c);
- data >>= 7;
+ out.put(c);
+ data >>= 7;
#ifdef LOG_WRITE_ACTIVITY
bytes_written++;
#endif
@@ -99,7 +99,7 @@ namespace ugdiss
while (data >= 128)
{
char c = data&127;
- out.put(c);
+ out.put(c);
data >>= 7;
#ifdef LOG_WRITE_ACTIVITY
bytes_written++;
@@ -112,16 +112,16 @@ namespace ugdiss
std::cerr << " in " << bytes_written << " bytes" << std::endl;
#endif
}
-
-// For the code below: does it make a difference if I hard-code the
+
+// For the code below: does it make a difference if I hard-code the
// unraveled loop or does code optimization by the compiler take care
// of that?
#define DEBUG_TIGHTREAD 0
- // read a key value from a tight index; filepos_type must be at least as
+ // read a key value from a tight index; filepos_type must be at least as
// large as count_type
- filepos_type
+ filepos_type
tightread(std::istream& in, std::ios::pos_type stop)
{
// debug=true;
@@ -131,8 +131,8 @@ namespace ugdiss
short int bitshift = 7;
int pos = in.tellg();
#if DEBUG_TIGHTREAD
- if (debug)
- cerr << bitpattern(uint(in.peek())) << " " << in.peek()
+ if (debug)
+ cerr << bitpattern(uint(in.peek())) << " " << in.peek()
<< " pos=" << in.tellg() << "\n";
#endif
int buf = in.get();
@@ -141,24 +141,24 @@ namespace ugdiss
else
stop = std::min(size_t(stop),size_t(in.tellg())+in.rdbuf()->in_avail());
if (buf < 0)
- std::cerr << "number read: " << buf << " " << pos << " "
+ std::cerr << "number read: " << buf << " " << pos << " "
<< in.tellg() << std::endl;
assert (buf>=0);
-
+
if (buf >= 128) // continuation bit is 1
{
data = buf-128; // unset the bit
while (in.tellg() < stop && in.peek() >= 128)
{
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
#endif
// cerr << bitpattern(size_t(in.peek())) << std::endl;
data += size_t(in.get()-128)<<bitshift;
bitshift += 7;
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << " " << data << " pos=" << in.tellg() << std::endl;
#endif
}
@@ -170,14 +170,14 @@ namespace ugdiss
{
// cerr << bitpattern(size_t(in.peek())) << std::endl;
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
-
+
#endif
data += size_t(in.get())<<bitshift;
bitshift += 7;
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << " " << data << " pos=" << in.tellg() << "\n";
#endif
}
@@ -189,16 +189,16 @@ namespace ugdiss
#if DEBUG_TIGHTFIND
bool debug=true;
#endif
- bool
+ bool
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop)
{
- in.seekg((start+stop)/2);
- // Jump approximately to the middle. Since we might land in the
- // middle of a number, we need to find the start of the next
+ in.seekg((start+stop)/2);
+ // Jump approximately to the middle. Since we might land in the
+ // middle of a number, we need to find the start of the next
// [index key/file offset] pair first. Bytes belonging to an index
- // key have the leftmost bit set to 0, bytes belonging to a file
+ // key have the leftmost bit set to 0, bytes belonging to a file
// offset have it set to 1
-
+
// if we landed in the middle of an index key, skip to the end of it
while (static_cast<filepos_type>(in.tellg()) < stop && in.get() < 128)
{
@@ -216,9 +216,9 @@ bool debug=true;
while (static_cast<filepos_type>(in.tellg()) < stop && in.peek() >= 128)
{
#if DEBUG_TIGHTFIND
- int r = in.get();
+ int r = in.get();
if (debug)
- std::cerr << in.tellg() << " skipped value byte " << r
+ std::cerr << in.tellg() << " skipped value byte " << r
<< " next is " << in.peek()
<< std::endl;
#else
@@ -227,9 +227,9 @@ bool debug=true;
}
return true;
}
-
- char const*
- tightfind_midpoint(char const* const start,
+
+ char const*
+ tightfind_midpoint(char const* const start,
char const* const stop)
{
char const* mp = start + (stop - start)/2;
@@ -238,46 +238,46 @@ bool debug=true;
return (*mp < 0) ? ++mp : mp;
}
- bool
- linear_search(std::istream& in, filepos_type start, filepos_type stop,
+ bool
+ linear_search(std::istream& in, filepos_type start, filepos_type stop,
id_type key, unsigned char& flags)
{ // performs a linear search in the range
in.seekg(start);
-
+
#if DEBUG_TIGHTFIND
if (debug) std::cerr << in.tellg() << " ";
#endif
-
- // ATTENTION! The bitshift operations below are important:
- // We use some of the bits in the key value to store additional
+
+ // ATTENTION! The bitshift operations below are important:
+ // We use some of the bits in the key value to store additional
// information about what and where node iformation is stored.
-
+
id_type foo;
- for(foo = tightread(in,stop);
- (foo>>FLAGBITS) < key;
- foo = tightread(in,stop))
+ for(foo = tightread(in,stop);
+ (foo>>FLAGBITS) < key;
+ foo = tightread(in,stop))
{
// skip the value associated with key /foo/
- while (static_cast<filepos_type>(in.tellg()) < stop
- && in.peek() >= 128) in.get();
-
+ while (static_cast<filepos_type>(in.tellg()) < stop
+ && in.peek() >= 128) in.get();
+
#if DEBUG_TIGHTFIND
- if (debug)
- std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
+ if (debug)
+ std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
<< in.tellg() << std::endl;
#endif
-
+
if (in.tellg() == std::ios::pos_type(stop))
return false; // not found
}
-
+
#if DEBUG_TIGHTFIND
- if (debug && (foo>>FLAGBITS)==key)
+ if (debug && (foo>>FLAGBITS)==key)
std::cerr << "found entry for " << key << std::endl;
- std::cerr << "current file position is " << in.tellg()
+ std::cerr << "current file position is " << in.tellg()
<< " (value read: " << key << std::endl;
#endif
-
+
assert(static_cast<filepos_type>(in.tellg()) < stop);
if ((foo>>FLAGBITS)==key)
{
@@ -288,51 +288,51 @@ bool debug=true;
else
return false;
}
-
+
bool
- tightfind(std::istream& in, filepos_type start, filepos_type stop,
+ tightfind(std::istream& in, filepos_type start, filepos_type stop,
id_type key, unsigned char& flags)
{
- // returns true if the value is found
+ // returns true if the value is found
#if DEBUG_TIGHTFIND
if (debug)
- std::cerr << "looking for " << key
+ std::cerr << "looking for " << key
<< " in range [" << start << ":" << stop << "]" << std::endl;
#endif
if (start==stop) return false;
assert(stop>start);
if ((start+1)==stop) return false; // list is empty
-
- unsigned int const granularity = sizeof(filepos_type)*5;
+
+ unsigned int const granularity = sizeof(filepos_type)*5;
// granularity: point where we should switch to linear search,
// because otherwise we might skip over the entry we are looking for
// because we land right in the middle of it.
-
+
if (stop > start + granularity)
- if (!tightfind_midpoint(in,start,stop))
+ if (!tightfind_midpoint(in,start,stop))
return false; // something went wrong (empty index)
-
+
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
{ // If the search range is very short, tightfind_midpoint might skip the
// entry we are loking for. In this case, we can afford a linear
// search
return linear_search(in,start,stop,key,flags);
}
-
+
// perform binary search
filepos_type curpos = in.tellg();
id_type foo = tightread(in,stop);
id_type tmpid = foo>>FLAGBITS;
- if (tmpid == key)
+ if (tmpid == key)
{
- flags = foo%256;
+ flags = foo%256;
flags &= FLAGMASK;
#if DEBUG_TIGHTFIND
if (debug) std::cerr << "found entry for " << key << std::endl;
#endif
- return true; // done, found
+ return true; // done, found
}
- else if (tmpid > key)
+ else if (tmpid > key)
{ // look in the lower half
#if DEBUG_TIGHTFIND
if (debug) std::cerr << foo << " > " << key << std::endl;
@@ -343,7 +343,7 @@ bool debug=true;
{ // look in the upper half
while (static_cast<filepos_type>(in.tellg()) < stop
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
- && in.peek() >= 128)
+ && in.peek() >= 128)
in.get(); // skip associated value
if (in.rdbuf()->in_avail() == 0 || in.tellg() == std::ios::pos_type(stop))
return false;
@@ -353,16 +353,16 @@ bool debug=true;
return tightfind(in,in.tellg(),stop,key,flags);
}
}
-
+
char const*
- tightfind(char const* const start,
+ tightfind(char const* const start,
char const* const stop,
- id_type key,
+ id_type key,
unsigned char& flags)
{
- // returns true if the value is found
-
+ // returns true if the value is found
+
if (start==stop) return NULL;
assert(stop>start);
if ((start+1)==stop) return NULL; // list is empty
@@ -374,11 +374,11 @@ bool debug=true;
id_type tmpId = foo>>FLAGBITS;
if (tmpId == key)
{
- flags = foo%256;
+ flags = foo%256;
flags &= FLAGMASK;
return after;
}
- else if (tmpId > key)
+ else if (tmpId > key)
{ // look in the lower half
return tightfind(start,p,key,flags);
}
@@ -389,14 +389,14 @@ bool debug=true;
return tightfind(after,stop,key,flags);
}
}
-
+
char const*
- tightfind_noflags(char const* const start,
+ tightfind_noflags(char const* const start,
char const* const stop,
id_type key)
{
- // returns true if the value is found
-
+ // returns true if the value is found
+
if (start==stop) return NULL;
assert(stop>start);
if ((start+1)==stop) return NULL; // list is empty
@@ -407,7 +407,7 @@ bool debug=true;
char const* after = tightread(p,stop,foo);
if (foo == key)
return after;
- else if (foo > key)
+ else if (foo > key)
{ // look in the lower half
return tightfind_noflags(start,p,key);
}
@@ -419,19 +419,19 @@ bool debug=true;
}
}
- bool
- linear_search_noflags(std::istream& in, filepos_type start,
+ bool
+ linear_search_noflags(std::istream& in, filepos_type start,
filepos_type stop, id_type key)
{ // performs a linear search in the range
- std::ios::pos_type mystop = stop;
+ std::ios::pos_type mystop = stop;
in.seekg(start);
id_type foo;
- for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
+ for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
{
// skip the value associated with key /foo/
- while (in.tellg() < mystop && in.peek() >= 128)
- in.get();
+ while (in.tellg() < mystop && in.peek() >= 128)
+ in.get();
if (in.tellg() == mystop)
return false; // not found
}
@@ -441,45 +441,45 @@ bool debug=true;
bool
- tightfind_noflags(std::istream& in, filepos_type start,
+ tightfind_noflags(std::istream& in, filepos_type start,
filepos_type stop, id_type key)
{
- // returns true if the value is found
+ // returns true if the value is found
if (start==stop) return false;
assert(stop>start);
if ((start+1)==stop) return false; // list is empty
-
+
// granularity: point where we should switch to linear search,
// because otherwise we might skip over the entry we are looking for
// because we land right in the middle of it.
- unsigned int const granularity = sizeof(filepos_type)*5;
+ unsigned int const granularity = sizeof(filepos_type)*5;
// UG: why 5? we should be able to get away with less!
-
+
if (stop > start + granularity)
- if (!tightfind_midpoint(in,start,stop))
+ if (!tightfind_midpoint(in,start,stop))
return false; // something went wrong (empty index)
-
+
// If the search range is very short, tightfind_midpoint might skip the
// entry we are loking for. In this case, we can afford a linear
// search
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
return linear_search_noflags(in,start,stop,key);
-
+
// Otherwise, perform binary search
filepos_type curpos = in.tellg();
id_type foo = tightread(in,stop);
- if (foo == key)
- return true; // done, found
+ if (foo == key)
+ return true; // done, found
else if (foo > key) // search first half
return tightfind_noflags(in,start,curpos,key);
else // search second half
- {
- std::ios::pos_type mystop = stop;
+ {
+ std::ios::pos_type mystop = stop;
while (in.tellg() < mystop
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
- && in.peek() >= 128)
+ && in.peek() >= 128)
in.get(); // skip associated value
if (in.rdbuf()->in_avail() == 0 || in.tellg() == mystop)
return false;
@@ -496,9 +496,9 @@ bool debug=true;
{
foo += 32768; // set first bit
while (data >= 32768) // = 2^15
- {
+ {
out.write(reinterpret_cast<char*>(&foo),2);
- data >>= 15;
+ data >>= 15;
foo = (data%32768)+32768;
}
}
@@ -507,7 +507,7 @@ bool debug=true;
while (data >= 32768) // = 2^15
{
out.write(reinterpret_cast<char*>(&foo),2);
- data >>= 15;
+ data >>= 15;
foo = data%32768;
}
}
@@ -515,8 +515,8 @@ bool debug=true;
}
char const*
- tightread8(char const* start,
- char const* stop,
+ tightread8(char const* start,
+ char const* stop,
uint64_t& dest)
{
static char bitmask=127;
@@ -570,8 +570,8 @@ bool debug=true;
}
char const*
- tightread4(char const* start,
- char const* stop,
+ tightread4(char const* start,
+ char const* stop,
uint32_t& dest)
{
static char bitmask=127;
@@ -605,8 +605,8 @@ bool debug=true;
}
char const*
- tightread2(char const* start,
- char const* stop,
+ tightread2(char const* start,
+ char const* stop,
uint16_t& dest)
{
static char bitmask=127;
diff --git a/moses/TranslationModel/UG/mm/tpt_tightindex.h b/moses/TranslationModel/UG/mm/tpt_tightindex.h
index 66594bc0a..967215aeb 100644
--- a/moses/TranslationModel/UG/mm/tpt_tightindex.h
+++ b/moses/TranslationModel/UG/mm/tpt_tightindex.h
@@ -28,46 +28,46 @@ extern bool debug;
namespace ugdiss
{
// void tightwritex(iostream& out, size_t data, bool flag);
- void
+ void
tightwrite(std::ostream& out, ::uint64_t data, bool flag);
- filepos_type
+ filepos_type
tightread(std::istream& in, std::ios::pos_type stop);
bool
- tightfind(std::istream& in,
- filepos_type start,
- filepos_type stop,
+ tightfind(std::istream& in,
+ filepos_type start,
+ filepos_type stop,
id_type key,
unsigned char& flags);
bool
- tightfind_noflags(std::istream& in,
- filepos_type start,
- filepos_type stop,
+ tightfind_noflags(std::istream& in,
+ filepos_type start,
+ filepos_type stop,
id_type key);
char const*
- tightfind(char const* const start,
+ tightfind(char const* const start,
char const* const stop,
- id_type key,
+ id_type key,
unsigned char& flags);
char const*
- tightfind_noflags(char const* const start,
+ tightfind_noflags(char const* const start,
char const* const stop,
id_type key);
- /** move read header in istream /in/ to the first entry after the midpoint of
- * file position range [start,stop) in in a 'tight' index
+ /** move read header in istream /in/ to the first entry after the midpoint of
+ * file position range [start,stop) in in a 'tight' index
* @param in the data input stream
* @param start start of the search range
* @param stop end of the search range
- * @return true if no errors occurred
- */
- bool
+ * @return true if no errors occurred
+ */
+ bool
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop);
// the bitpattern functions below are for debugging
@@ -115,8 +115,8 @@ namespace ugdiss
#if 0
template<typename dtype>
- char const*
- tightread(char const* start,
+ char const*
+ tightread(char const* start,
char const* stop,
dtype& dest)
{
diff --git a/moses/TranslationModel/UG/mm/tpt_tokenindex.cc b/moses/TranslationModel/UG/mm/tpt_tokenindex.cc
index c6704beac..5fc6a6acc 100644
--- a/moses/TranslationModel/UG/mm/tpt_tokenindex.cc
+++ b/moses/TranslationModel/UG/mm/tpt_tokenindex.cc
@@ -15,15 +15,15 @@ namespace ugdiss
{
TokenIndex::
- TokenIndex(string unkToken)
+ TokenIndex(string unkToken)
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0)
- {
+ {
lock.reset(new boost::mutex());
};
-
+
#if 0
TokenIndex::
- TokenIndex(string fname, string unkToken,bool dyna)
+ TokenIndex(string fname, string unkToken,bool dyna)
: ridx(0),unkLabel(unkToken)
{
this->open(fname,unkToken,dyna);
@@ -58,8 +58,8 @@ namespace ugdiss
if (!unkToken.empty())
{
Entry const* bla = lower_bound(startIdx,endIdx,unkToken.c_str(),comp);
- unkId = ((bla < endIdx && unkToken == comp.base+bla->offset)
- ? bla->id
+ unkId = ((bla < endIdx && unkToken == comp.base+bla->offset)
+ ? bla->id
: numTokens);
}
this->dynamic=dyna;
@@ -69,7 +69,7 @@ namespace ugdiss
this->newWords.reset(new vector<string>());
}
}
-
+
void
TokenIndex::
close()
@@ -79,9 +79,9 @@ namespace ugdiss
TokenIndex::
CompFunc::
- CompFunc()
+ CompFunc()
{};
-
+
bool
TokenIndex::
CompFunc::
@@ -90,7 +90,7 @@ namespace ugdiss
return strcmp(base+A.offset,w) < 0;
};
- id_type
+ id_type
TokenIndex::
operator[](char const* p) const
{
@@ -101,7 +101,7 @@ namespace ugdiss
if (!dynamic) return unkId;
boost::lock_guard<boost::mutex> lk(*this->lock);
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items
- // IMPORTANT: numTokens is not currently not changed, it is the number of
+ // IMPORTANT: numTokens is not currently not changed, it is the number of
// PRE-EXISING TOKENS, not including dynamically added Items
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
@@ -110,14 +110,14 @@ namespace ugdiss
return foo.first->second;
}
- id_type
+ id_type
TokenIndex::
operator[](string const& w) const
{
return (*this)[w.c_str()];
}
- vector<char const*>
+ vector<char const*>
TokenIndex::
reverseIndex() const
{
@@ -125,11 +125,11 @@ namespace ugdiss
// cout << "tokenindex has " << numToks << " tokens" << endl;
- vector<char const*> v(numToks,NULL);
+ vector<char const*> v(numToks,NULL);
// v.reserve(endIdx-startIdx);
for (Entry const* x = startIdx; x != endIdx; x++)
{
- if (x->id >= v.size())
+ if (x->id >= v.size())
v.resize(x->id+1);
v[x->id] = comp.base+x->offset;
}
@@ -141,12 +141,12 @@ namespace ugdiss
TokenIndex::
operator[](id_type id) const
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
}
- if (id < ridx.size())
+ if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
@@ -156,26 +156,26 @@ namespace ugdiss
void
TokenIndex::
- iniReverseIndex()
+ iniReverseIndex()
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
}
}
-
+
char const* const
TokenIndex::
- operator[](id_type id)
+ operator[](id_type id)
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
}
- if (id < ridx.size())
+ if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
@@ -183,11 +183,11 @@ namespace ugdiss
return unkLabel.c_str();
}
- string
+ string
TokenIndex::
- toString(vector<id_type> const& v)
+ toString(vector<id_type> const& v)
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -198,11 +198,11 @@ namespace ugdiss
return buf.str();
}
- string
+ string
TokenIndex::
toString(vector<id_type> const& v) const
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -213,11 +213,11 @@ namespace ugdiss
return buf.str();
}
- string
+ string
TokenIndex::
- toString(id_type const* start, id_type const* const stop)
+ toString(id_type const* start, id_type const* const stop)
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -230,11 +230,11 @@ namespace ugdiss
return buf.str();
}
- string
+ string
TokenIndex::
toString(id_type const* start, id_type const* const stop) const
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -266,7 +266,7 @@ namespace ugdiss
{
bool allgood = true; string w;
v.clear();
- for (istringstream buf(line); buf>>w;)
+ for (istringstream buf(line); buf>>w;)
{
v.push_back((*this)[w]);
allgood = allgood && v.back() > 1;
@@ -325,15 +325,15 @@ namespace ugdiss
}
void
- write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
+ write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken)
{
typedef pair<uint32_t,id_type> IndexEntry; // offset and id
// Write token strings to a buffer, keep track of offsets
- vector<IndexEntry> index(tok.size());
+ vector<IndexEntry> index(tok.size());
ostringstream data;
- id_type unkId = tok.size();
+ id_type unkId = tok.size();
for (size_t i = 0; i < tok.size(); i++)
{
if (tok[i].first == unkToken)
@@ -342,7 +342,7 @@ namespace ugdiss
index[i].second = tok[i].second; // respective ID
data<<tok[i].first<<char(0); // write string to buffer
}
-
+
// Now write the actual file
ofstream out(ofile.c_str());
uint32_t vsize = index.size(); // how many vocab items?
@@ -356,26 +356,26 @@ namespace ugdiss
out<<data.str();
}
- void
+ void
TokenIndex::
write(string fname)
{
typedef pair<string,uint32_t> Token; // token and id
- vector<Token> tok(totalVocabSize());
+ vector<Token> tok(totalVocabSize());
for (id_type i = 0; i < tok.size(); ++i)
tok[i] = Token((*this)[i],i);
sort(tok.begin(),tok.end());
write_tokenindex_to_disk(tok,fname,unkLabel);
}
-
- bool
+
+ bool
TokenIndex::
- isDynamic() const
+ isDynamic() const
{
return dynamic;
}
- bool
+ bool
TokenIndex::
setDynamic(bool on)
{
@@ -393,7 +393,7 @@ namespace ugdiss
}
return ret;
}
-
+
void
TokenIndex::
setUnkLabel(string unk)
diff --git a/moses/TranslationModel/UG/mm/tpt_tokenindex.h b/moses/TranslationModel/UG/mm/tpt_tokenindex.h
index 3051f07a5..9f7c69b3e 100644
--- a/moses/TranslationModel/UG/mm/tpt_tokenindex.h
+++ b/moses/TranslationModel/UG/mm/tpt_tokenindex.h
@@ -3,7 +3,7 @@
//
// - Vocab items should be stored in order of ids, so that we can determine their length
// by taking computing V[id+1] - V[id] instead of using strlen.
-//
+//
// (c) 2007,2008 Ulrich Germann
#ifndef __ugTokenIndex_hh
@@ -30,7 +30,7 @@ namespace ugdiss
/** Reverse index: maps from ID to char const* */
mutable vector<char const*> ridx;
/** Label for the UNK token */
- string unkLabel;
+ string unkLabel;
id_type unkId,numTokens;
/// New 2013-09-02: thread-safe
@@ -42,9 +42,9 @@ namespace ugdiss
boost::shared_ptr<vector<string> > newWords;
// The use of pointers to external items is a bit of a bad hack
// in terms of the semantic of TokenIndex const: since external items
- // are changed, the TokenIndex instance remains unchanged and const works,
- // even though in reality the underlying object on the coceptual level
- // *IS* changed. This means that dynamic TokenIndex instances are not
+ // are changed, the TokenIndex instance remains unchanged and const works,
+ // even though in reality the underlying object on the coceptual level
+ // *IS* changed. This means that dynamic TokenIndex instances are not
// thread-safe!
public:
@@ -53,7 +53,7 @@ namespace ugdiss
{
public:
uint32_t offset;
- id_type id;
+ id_type id;
};
/** Comparison function object used for Entry instances */
@@ -111,19 +111,19 @@ namespace ugdiss
void setUnkLabel(string unk);
};
- void
- write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
+ void
+ write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken);
/** for sorting words by frequency */
class compWords
{
string unk;
- public:
+ public:
compWords(string _unk) : unk(_unk) {};
-
+
bool
- operator()(pair<string,size_t> const& A,
+ operator()(pair<string,size_t> const& A,
pair<string,size_t> const& B) const
{
if (A.first == unk) return false;// do we still need this special treatment?
@@ -142,7 +142,7 @@ namespace ugdiss
typedef pair<string,uint32_t> Token; // token and id
- // first, sort the word list in decreasing order of frequency, so that we
+ // first, sort the word list in decreasing order of frequency, so that we
// can assign IDs in an encoding-efficient manner (high frequency. low ID)
vector<pair<string,size_t> > wcounts(M.size()); // for sorting by frequency
typedef typename MYMAP::const_iterator myIter;
@@ -156,16 +156,16 @@ namespace ugdiss
sort(wcounts.begin(),wcounts.end(),compFunc);
// Assign IDs ...
- vector<Token> tok(wcounts.size());
+ vector<Token> tok(wcounts.size());
for (size_t i = 0; i < wcounts.size(); i++)
tok[i] = Token(wcounts[i].first,i);
// and re-sort in alphabetical order
- sort(tok.begin(),tok.end());
+ sort(tok.begin(),tok.end());
write_tokenindex_to_disk(tok,ofile,unkToken);
}
template<typename Token>
- void
+ void
fill_token_seq(TokenIndex& V, string const& line, vector<Token>& dest)
{
istringstream buf(line); string w;
diff --git a/moses/TranslationModel/UG/mm/tpt_typedefs.h b/moses/TranslationModel/UG/mm/tpt_typedefs.h
index fea221d61..d2d2932de 100644
--- a/moses/TranslationModel/UG/mm/tpt_typedefs.h
+++ b/moses/TranslationModel/UG/mm/tpt_typedefs.h
@@ -12,4 +12,4 @@ namespace ugdiss
typedef uint64_t filepos_type;
typedef unsigned char uchar;
}
-#endif
+#endif
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc
index d2899e677..809476aa9 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@@ -8,18 +8,18 @@ using namespace ugdiss;
using namespace std;
namespace Moses
{
- namespace bitext
+ namespace bitext
{
- float
+ float
lbop(size_t const tries, size_t const succ, float const confidence)
{
- return (confidence == 0
- ? float(succ)/tries
+ return (confidence == 0
+ ? float(succ)/tries
: (boost::math::binomial_distribution<>::
find_lower_bound_on_p(tries, succ, confidence)));
}
-
+
// template<>
void
@@ -42,37 +42,37 @@ namespace Moses
else
index.reset(new imTSA<tkn>(track,NULL,NULL));
}
-
+
snt_adder<L2R_Token<SimpleWordId> >::
- snt_adder(vector<string> const& s, TokenIndex& v,
- sptr<imTtrack<L2R_Token<SimpleWordId> > >& t,
+ snt_adder(vector<string> const& s, TokenIndex& v,
+ sptr<imTtrack<L2R_Token<SimpleWordId> > >& t,
sptr<imTSA<L2R_Token<SimpleWordId> > >& i)
- : snt(s), V(v), track(t), index(i)
+ : snt(s), V(v), track(t), index(i)
{ }
- bool
+ bool
expand_phrase_pair
- (vector<vector<ushort> >& a1,
+ (vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
ushort const s2, // next word on in target side
ushort const L1, ushort const R1, // limits of previous phrase
ushort & s1, ushort & e1, ushort& e2) // start/end src; end trg
{
- if (a2[s2].size() == 0)
+ if (a2[s2].size() == 0)
{
cout << __FILE__ << ":" << __LINE__ << endl;
return false;
}
bitvector done1(a1.size());
bitvector done2(a2.size());
- vector <pair<ushort,ushort> > agenda;
+ vector <pair<ushort,ushort> > agenda;
// x.first: side (1 or 2)
// x.second: word position
agenda.reserve(a1.size() + a2.size());
agenda.push_back(pair<ushort,ushort>(2,s2));
e2 = s2;
s1 = e1 = a2[s2].front();
- if (s1 >= L1 && s1 < R1)
+ if (s1 >= L1 && s1 < R1)
{
cout << __FILE__ << ":" << __LINE__ << endl;
return false;
@@ -88,14 +88,14 @@ namespace Moses
done1.set(p);
BOOST_FOREACH(ushort i, a1[p])
{
- if (i < s2)
+ if (i < s2)
{
// cout << __FILE__ << ":" << __LINE__ << endl;
return false;
}
if (done2[i]) continue;
for (;e2 <= i;++e2)
- if (!done2[e2])
+ if (!done2[e2])
agenda.push_back(pair<ushort,ushort>(2,e2));
}
}
@@ -104,16 +104,16 @@ namespace Moses
done2.set(p);
BOOST_FOREACH(ushort i, a2[p])
{
- if ((e1 < L1 && i >= L1) ||
- (s1 >= R1 && i < R1) ||
+ if ((e1 < L1 && i >= L1) ||
+ (s1 >= R1 && i < R1) ||
(i >= L1 && i < R1))
{
- // cout << __FILE__ << ":" << __LINE__ << " "
- // << L1 << "-" << R1 << " " << i << " "
+ // cout << __FILE__ << ":" << __LINE__ << " "
+ // << L1 << "-" << R1 << " " << i << " "
// << s1 << "-" << e1<< endl;
return false;
}
-
+
if (e1 < i)
{
for (; e1 <= i; ++e1)
@@ -134,7 +134,7 @@ namespace Moses
return true;
}
- void
+ void
print_amatrix(vector<vector<ushort> > a1, uint32_t len2,
ushort b1, ushort e1, ushort b2, ushort e2)
{
@@ -163,7 +163,7 @@ namespace Moses
cout << string(90,'-') << endl;
}
- void
+ void
write_bitvector(bitvector const& v, ostream& out)
{
for (size_t i = v.find_first(); i < v.size();)
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h
index 7fb07fc26..ab5f2a24f 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@@ -2,18 +2,18 @@
#pragma once
// Implementations of word-aligned bitext.
// Written by Ulrich Germann
-//
+//
// mmBitext: static, memory-mapped bitext
// imBitext: dynamic, in-memory bitext
//
// things we can do to speed up things:
-// - set up threads at startup time that force the
+// - set up threads at startup time that force the
// data in to memory sequentially
//
-// - use multiple agendas for better load balancing and to avoid
+// - use multiple agendas for better load balancing and to avoid
// competition for locks
-//
+//
#define UG_BITEXT_TRACK_ACTIVE_THREADS 0
@@ -70,7 +70,7 @@ namespace Moses {
float lbop(size_t const tries, size_t const succ, float const confidence);
void write_bitvector(bitvector const& v, ostream& out);
- struct
+ struct
ContextForQuery
{
// needs to be made thread-safe
@@ -85,7 +85,7 @@ namespace Moses {
template<typename TKN>
- class Bitext
+ class Bitext
{
public:
typedef TKN Token;
@@ -98,19 +98,19 @@ namespace Moses {
mutable boost::shared_mutex m_lock; // for thread-safe operation
class agenda; // for parallel sampling see ug_bitext_agenda.h
- mutable sptr<agenda> ag;
+ mutable sptr<agenda> ag;
size_t m_num_workers; // number of workers available to the agenda
- size_t m_default_sample_size;
+ size_t m_default_sample_size;
size_t m_pstats_cache_threshold; // threshold for caching sampling results
sptr<pstats::cache_t> m_cache1, m_cache2; // caches for sampling results
-
+
vector<string> m_docname;
map<string,id_type> m_docname2docid; // maps from doc names to ids
sptr<std::vector<id_type> > m_sid2docid; // maps from sentences to docs (ids)
mutable pplist_cache_t m_pplist_cache1, m_pplist_cache2;
- // caches for unbiased sampling; biased sampling uses the caches that
+ // caches for unbiased sampling; biased sampling uses the caches that
// are stored locally on the translation task
public:
@@ -123,9 +123,9 @@ namespace Moses {
sptr<TSA<Token> > I2; // indices
/// given the source phrase sid[start:stop]
- // find the possible start (s1 .. s2) and end (e1 .. e2)
+ // find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
- // alignments in *core_alignment. If /flip/, source phrase is
+ // alignments in *core_alignment. If /flip/, source phrase is
// L2.
bool find_trg_phr_bounds
( size_t const sid, // sentence to investigate
@@ -136,27 +136,27 @@ namespace Moses {
int& po_fwd, int& po_bwd, // phrase orientations
std::vector<uchar> * core_alignment, // stores the core alignment
bitvector* full_alignment, // stores full word alignment for this sent.
- bool const flip) const; // flip source and target (reverse lookup)
-
- // prep2 launches sampling and returns immediately.
+ bool const flip) const; // flip source and target (reverse lookup)
+
+ // prep2 launches sampling and returns immediately.
// lookup (below) waits for the job to finish before it returns
- sptr<pstats>
+ sptr<pstats>
prep2(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
-
+
public:
Bitext(size_t const max_sample = 1000, size_t const xnum_workers = 16);
- Bitext(Ttrack<Token>* const t1, Ttrack<Token>* const t2,
- Ttrack<char>* const tx,
+ Bitext(Ttrack<Token>* const t1, Ttrack<Token>* const t2,
+ Ttrack<char>* const tx,
TokenIndex* const v1, TokenIndex* const v2,
TSA<Token>* const i1, TSA<Token>* const i2,
- size_t const max_sample=1000,
+ size_t const max_sample=1000,
size_t const xnum_workers=16);
-
- virtual void
+
+ virtual void
open(string const base, string const L1, string const L2) = 0;
-
- sptr<pstats>
+
+ sptr<pstats>
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
void prep(ttasksptr const& ttask, iter const& phrase) const;
@@ -176,7 +176,7 @@ namespace Moses {
void
- mark_match(Token const* start, Token const* end, iter const& m,
+ mark_match(Token const* start, Token const* end, iter const& m,
bitvector& check) const;
void
write_yawat_alignment
@@ -184,10 +184,10 @@ namespace Moses {
#if 0
// needs to be adapted to the new API
void
- lookup(std::vector<Token> const& snt, TSA<Token>& idx,
+ lookup(std::vector<Token> const& snt, TSA<Token>& idx,
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
std::vector<std::vector<uint64_t> >* pidmap = NULL,
- typename PhrasePair<Token>::Scorer* scorer=NULL,
+ typename PhrasePair<Token>::Scorer* scorer=NULL,
sptr<SamplingBias const> const bias,
bool multithread=true) const;
#endif
@@ -233,32 +233,32 @@ namespace Moses {
Token const* t = (isL2 ? T2 : T1)->sntStart(sid) + off;
Token const* x = t + len;
TokenIndex const& V = isL2 ? *V2 : *V1;
- while (t < x)
+ while (t < x)
{
buf << V[t->id()];
if (++t < x) buf << " ";
}
return buf.str();
}
-
+
template<typename Token>
- size_t
+ size_t
Bitext<Token>::
- getDefaultSampleSize() const
- {
- return m_default_sample_size;
+ getDefaultSampleSize() const
+ {
+ return m_default_sample_size;
}
template<typename Token>
- void
+ void
Bitext<Token>::
setDefaultSampleSize(size_t const max_samples)
- {
+ {
boost::unique_lock<boost::shared_mutex> guard(m_lock);
- if (max_samples != m_default_sample_size)
+ if (max_samples != m_default_sample_size)
{
m_cache1.reset(new pstats::cache_t);
m_cache2.reset(new pstats::cache_t);
- m_default_sample_size = max_samples;
+ m_default_sample_size = max_samples;
}
}
@@ -274,12 +274,12 @@ namespace Moses {
template<typename Token>
Bitext<Token>::
- Bitext(Ttrack<Token>* const t1,
- Ttrack<Token>* const t2,
+ Bitext(Ttrack<Token>* const t1,
+ Ttrack<Token>* const t2,
Ttrack<char>* const tx,
- TokenIndex* const v1,
+ TokenIndex* const v1,
TokenIndex* const v2,
- TSA<Token>* const i1,
+ TSA<Token>* const i1,
TSA<Token>* const i2,
size_t const max_sample,
size_t const xnum_workers)
@@ -294,7 +294,7 @@ namespace Moses {
template<typename TKN> class snt_adder;
template<> class snt_adder<L2R_Token<SimpleWordId> >;
- template<>
+ template<>
class snt_adder<L2R_Token<SimpleWordId> >
{
typedef L2R_Token<SimpleWordId> TKN;
@@ -303,9 +303,9 @@ namespace Moses {
sptr<imTtrack<TKN> > & track;
sptr<imTSA<TKN > > & index;
public:
- snt_adder(std::vector<string> const& s, TokenIndex& v,
+ snt_adder(std::vector<string> const& s, TokenIndex& v,
sptr<imTtrack<TKN> >& t, sptr<imTSA<TKN> >& i);
-
+
void operator()();
};
@@ -313,17 +313,17 @@ namespace Moses {
bool
Bitext<Token>::
find_trg_phr_bounds
- (size_t const sid,
+ (size_t const sid,
size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
int & po_fwd, int & po_bwd,
- std::vector<uchar>* core_alignment, bitvector* full_alignment,
+ std::vector<uchar>* core_alignment, bitvector* full_alignment,
bool const flip) const
{
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
// a word on the core_alignment:
- //
+ //
// since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1
// < e2, respectively) are be definition unaligned, we store
// only the core alignment in *core_alignment it is up to the
@@ -364,18 +364,18 @@ namespace Moses {
else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
UTIL_THROW_IF2((src >= slen1 || trg >= slen2),
- "Alignment range error at sentence " << sid << "!\n"
- << src << "/" << slen1 << " " <<
+ "Alignment range error at sentence " << sid << "!\n"
+ << src << "/" << slen1 << " " <<
trg << "/" << slen2);
-
- if (src < start || src >= stop)
+
+ if (src < start || src >= stop)
forbidden.set(trg);
else
{
lft = min(lft,trg);
rgt = max(rgt,trg);
}
- if (core_alignment)
+ if (core_alignment)
{
aln1[src].push_back(trg);
aln2[trg].push_back(src);
@@ -383,16 +383,16 @@ namespace Moses {
if (full_alignment)
full_alignment->set(src*slen2 + trg);
}
-
+
for (size_t i = lft; i <= rgt; ++i)
- if (forbidden[i])
+ if (forbidden[i])
return false;
-
+
s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
-
+
if (lft > rgt) return false;
- if (core_alignment)
+ if (core_alignment)
{
core_alignment->clear();
for (size_t i = start; i < stop; ++i)
@@ -417,7 +417,7 @@ namespace Moses {
( string const& bserver, string const& text, ostream* log ) const
{
sptr<DocumentBias> ret;
- ret.reset(new DocumentBias(*m_sid2docid, m_docname2docid,
+ ret.reset(new DocumentBias(*m_sid2docid, m_docname2docid,
bserver, text, log));
return ret;
}
@@ -435,15 +435,15 @@ namespace Moses {
// and waits until the sampling is finished before it returns.
// This allows sampling in the background
template<typename Token>
- sptr<pstats>
+ sptr<pstats>
Bitext<Token>
- ::prep2
+ ::prep2
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
if (max_sample < 0) max_sample = m_default_sample_size;
sptr<ContextScope> scope = ttask->GetScope();
sptr<ContextForQuery> context = scope->get<ContextForQuery>(this);
- sptr<SamplingBias> bias;
+ sptr<SamplingBias> bias;
if (context) bias = context->bias;
sptr<pstats::cache_t> cache;
@@ -451,9 +451,9 @@ namespace Moses {
// (still need to test what a good caching threshold is ...)
// - use the task-specific cache when there is a sampling bias
if (max_sample == int(m_default_sample_size)
- && phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
+ && phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
{
- cache = (phrase.root == I1.get()
+ cache = (phrase.root == I1.get()
? (bias ? context->cache1 : m_cache1)
: (bias ? context->cache2 : m_cache2));
// if (bias) cerr << "Using bias." << endl;
@@ -461,17 +461,17 @@ namespace Moses {
sptr<pstats> ret;
sptr<pstats> const* cached;
- if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
+ if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
return *cached;
boost::unique_lock<boost::shared_mutex> guard(m_lock);
- if (!ag)
+ if (!ag)
{
ag.reset(new agenda(*this));
if (m_num_workers > 1)
ag->add_workers(m_num_workers);
}
// cerr << "NEW FREQUENT PHRASE: "
- // << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
+ // << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
// << " at " << __FILE__ << ":" << __LINE__ << endl;
ret = ag->add_job(this, phrase, max_sample, bias);
if (cache) cache->set(phrase.getPid(),ret);
@@ -497,8 +497,8 @@ namespace Moses {
// CONSTRUCTOR
pstats2pplist(typename TSA<Token>::tree_iterator const& m,
Ttrack<Token> const& other,
- sptr<pstats> const& ps,
- std::vector<PhrasePair<Token> >& dest,
+ sptr<pstats> const& ps,
+ std::vector<PhrasePair<Token> >& dest,
typename PhrasePair<Token>::Scorer const* scorer)
: m_other(other)
, m_pstats(ps)
@@ -509,17 +509,17 @@ namespace Moses {
, m_pid1(m.getPid())
, m_is_inverse(false)
{ }
-
+
// WORKER
- void
- operator()()
+ void
+ operator()()
{
// wait till all statistics have been collected
boost::unique_lock<boost::mutex> lock(m_pstats->lock);
while (m_pstats->in_progress)
m_pstats->ready.wait(lock);
- m_pp.init(m_pid1, m_is_inverse, m_token,m_len,m_pstats.get(),0);
+ m_pp.init(m_pid1, m_is_inverse, m_token,m_len,m_pstats.get(),0);
// convert pstats entries to phrase pairs
pstats::trg_map_t::iterator a;
@@ -531,8 +531,8 @@ namespace Moses {
m_pp.good2 = max(uint32_t(m_pp.raw2 * float(m_pp.good1)/m_pp.raw1),
m_pp.joint);
size_t J = m_pp.joint<<7; // hard coded threshold of 1/128
- if (m_pp.good1 > J || m_pp.good2 > J) continue;
- if (m_scorer)
+ if (m_pp.good1 > J || m_pp.good2 > J) continue;
+ if (m_scorer)
{
(*m_scorer)(m_pp);
}
@@ -543,23 +543,23 @@ namespace Moses {
}
};
-#if 0
+#if 0
template<typename Token>
void
Bitext<Token>::
- lookup(std::vector<Token> const& snt, TSA<Token>& idx,
+ lookup(std::vector<Token> const& snt, TSA<Token>& idx,
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
std::vector<std::vector<uint64_t> >* pidmap,
typename PhrasePair<Token>::Scorer* scorer,
sptr<SamplingBias const> const& bias, bool multithread) const
{
// typedef std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > > ret_t;
-
- dest.clear();
+
+ dest.clear();
dest.resize(snt.size());
if (pidmap) { pidmap->clear(); pidmap->resize(snt.size()); }
- // collect statistics in parallel, then build PT entries as
+ // collect statistics in parallel, then build PT entries as
// the sampling finishes
bool fwd = &idx == I1.get();
std::vector<boost::thread*> workers; // background threads doing the lookup
@@ -574,16 +574,16 @@ namespace Moses {
uint64_t key = m.getPid();
if (pidmap) (*pidmap)[i].push_back(key);
sptr<std::vector<PhrasePair<Token> > > pp = C.get(key);
- if (pp)
+ if (pp)
dest[i].push_back(pp);
- else
+ else
{
pp.reset(new std::vector<PhrasePair<Token> >());
C.set(key,pp);
dest[i].push_back(pp);
sptr<pstats> x = prep2(m, this->default_sample_size,bias);
pstats2pplist<Token> w(m,*(fwd?T2:T1),x,*pp,scorer);
- if (multithread)
+ if (multithread)
{
boost::thread* t = new boost::thread(w);
workers.push_back(t);
@@ -592,16 +592,16 @@ namespace Moses {
}
}
}
- for (size_t w = 0; w < workers.size(); ++w)
+ for (size_t w = 0; w < workers.size(); ++w)
{
- workers[w]->join();
+ workers[w]->join();
delete workers[w];
}
}
-#endif
+#endif
template<typename Token>
- sptr<pstats>
+ sptr<pstats>
Bitext<Token>::
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
@@ -615,7 +615,7 @@ namespace Moses {
boost::unique_lock<boost::shared_mutex> guard(m_lock);
typename agenda::worker(*this->ag)();
}
- else
+ else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
@@ -639,7 +639,7 @@ namespace Moses {
Token const* a = x;
Token const* b = s;
size_t i = 0;
- while (a && b && a->id() == b->id() && i < m.size())
+ while (a && b && a->id() == b->id() && i < m.size())
{
++i;
a = a->next();
@@ -669,7 +669,7 @@ namespace Moses {
pair<bitvector,bitvector> ag;
ag.first.resize(a1.size());
ag.second.resize(a2.size());
- char const* x = Tx->sntStart(sid);
+ char const* x = Tx->sntStart(sid);
size_t a, b;
while (x < Tx->sntEnd(sid))
{
@@ -677,11 +677,11 @@ namespace Moses {
x = binread(x,b);
if (a1.at(a) < 0 && a2.at(b) < 0)
{
- a1[a] = a2[b] = agroups.size();
- ag.first.reset();
- ag.second.reset();
- ag.first.set(a);
- ag.second.set(b);
+ a1[a] = a2[b] = agroups.size();
+ ag.first.reset();
+ ag.second.reset();
+ ag.first.set(a);
+ ag.second.set(b);
agroups.push_back(ag);
grouplabel.push_back(f1[a] || f2[b] ? "infocusbi" : "unspec");
}
@@ -697,7 +697,7 @@ namespace Moses {
agroups[a1[a]].second.set(b);
if (f1[a] || f2[b]) grouplabel[a1[a]] = "infocusbi";
}
- else
+ else
{
agroups[a1[a]].first |= agroups[a2[b]].first;
agroups[a1[a]].second |= agroups[a2[b]].second;
@@ -705,10 +705,10 @@ namespace Moses {
if (f1[a] || f2[b]) grouplabel[a1[a]] = "infocusbi";
}
}
-
+
for (a = 0; a < a1.size(); ++a)
{
- if (a1[a] < 0)
+ if (a1[a] < 0)
{
if (f1[a]) out << a << "::" << "infocusmono ";
continue;
@@ -729,7 +729,7 @@ namespace Moses {
#if 0
template<typename Token>
- sptr<pstats>
+ sptr<pstats>
Bitext<Token>::
lookup(siter const& phrase, size_t const max_sample,
sptr<SamplingBias const> const& bias) const
@@ -738,7 +738,7 @@ namespace Moses {
boost::unique_lock<boost::shared_mutex> guard(m_lock);
if (this->num_workers <= 1)
typename agenda::worker(*this->ag)();
- else
+ else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
@@ -747,25 +747,25 @@ namespace Moses {
return ret;
}
#endif
-
+
template<typename Token>
- void
- expand(typename Bitext<Token>::iter const& m,
- Bitext<Token> const& bt, pstats const& ps,
+ void
+ expand(typename Bitext<Token>::iter const& m,
+ Bitext<Token> const& bt, pstats const& ps,
std::vector<PhrasePair<Token> >& dest, ostream* log)
{
bool fwd = m.root == bt.I1.get();
dest.reserve(ps.trg.size());
PhrasePair<Token> pp;
pp.init(m.getPid(), !fwd, m.getToken(0), m.size(), &ps, 0);
- // cout << HERE << " "
+ // cout << HERE << " "
// << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << endl;
pstats::trg_map_t::const_iterator a;
for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
{
uint32_t sid,off,len;
parse_pid(a->first, sid, off, len);
- pp.update(a->first, (fwd ? bt.T2 : bt.T1)->sntStart(sid)+off,
+ pp.update(a->first, (fwd ? bt.T2 : bt.T1)->sntStart(sid)+off,
len, a->second);
dest.push_back(pp);
}
@@ -773,24 +773,24 @@ namespace Moses {
#if 0
template<typename Token>
- class
+ class
PStatsCache
{
typedef boost::unordered_map<uint64_t, sptr<pstats> > my_cache_t;
boost::shared_mutex m_lock;
- my_cache_t m_cache;
-
+ my_cache_t m_cache;
+
public:
sptr<pstats> get(Bitext<Token>::iter const& phrase) const;
- sptr<pstats>
+ sptr<pstats>
add(Bitext<Token>::iter const& phrase) const
{
uint64_t pid = phrase.getPid();
- std::pair<my_cache_t::iterator,bool>
+ std::pair<my_cache_t::iterator,bool>
}
-
+
};
#endif
} // end of namespace bitext
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda.h
index a9632c056..d07fba6aa 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_agenda.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda.h
@@ -1,8 +1,8 @@
// -*- c++ -*-
// to be included from ug_bitext.h
-// The agenda handles parallel sampling.
-// It maintains a queue of unfinished sampling jobs and
+// The agenda handles parallel sampling.
+// It maintains a queue of unfinished sampling jobs and
// assigns them to a pool of workers.
//
template<typename Token>
@@ -13,7 +13,7 @@ public:
class job;
class worker;
private:
- boost::mutex lock;
+ boost::mutex lock;
std::list<sptr<job> > joblist;
std::vector<sptr<boost::thread> > workers;
bool shutdown;
@@ -27,23 +27,23 @@ public:
agenda(Bitext<Token> const& bitext);
~agenda();
- void
+ void
add_workers(int n);
- sptr<pstats>
+ sptr<pstats>
add_job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& phrase,
+ typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples, sptr<SamplingBias const> const& bias);
// add_job(Bitext<Token> const* const theBitext,
- // typename TSA<Token>::tree_iterator const& phrase,
+ // typename TSA<Token>::tree_iterator const& phrase,
// size_t const max_samples, SamplingBias const* const bias);
- sptr<job>
+ sptr<job>
get_job();
};
-
+
template<typename Token>
-class
+class
Bitext<Token>::agenda::
worker
{
@@ -61,9 +61,9 @@ void Bitext<Token>
::agenda
::add_workers(int n)
{
- static boost::posix_time::time_duration nodelay(0,0,0,0);
+ static boost::posix_time::time_duration nodelay(0,0,0,0);
boost::lock_guard<boost::mutex> guard(this->lock);
-
+
int target = max(1, int(n + workers.size() - this->doomed));
// house keeping: remove all workers that have finished
for (size_t i = 0; i < workers.size(); )
@@ -79,7 +79,7 @@ void Bitext<Token>
// cerr << workers.size() << "/" << target << " active" << endl;
if (int(workers.size()) > target)
this->doomed = workers.size() - target;
- else
+ else
while (int(workers.size()) < target)
{
sptr<boost::thread> w(new boost::thread(worker(*this)));
@@ -92,16 +92,16 @@ template<typename Token>
sptr<pstats> Bitext<Token>
::agenda
::add_job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& phrase,
+ typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples, sptr<SamplingBias const> const& bias)
{
boost::unique_lock<boost::mutex> lk(this->lock);
- static boost::posix_time::time_duration nodelay(0,0,0,0);
+ static boost::posix_time::time_duration nodelay(0,0,0,0);
bool fwd = phrase.root == bt.I1.get();
- sptr<job> j(new job(theBitext, phrase, fwd ? bt.I1 : bt.I2,
+ sptr<job> j(new job(theBitext, phrase, fwd ? bt.I1 : bt.I2,
max_samples, fwd, bias));
j->stats->register_worker();
-
+
joblist.push_back(j);
if (joblist.size() == 1)
{
@@ -136,7 +136,7 @@ Bitext<Token>
sptr<job> ret;
if (this->shutdown) return ret;
boost::unique_lock<boost::mutex> lock(this->lock);
- if (this->doomed)
+ if (this->doomed)
{ // the number of workers has been reduced, tell the redundant once to quit
--this->doomed;
return ret;
@@ -145,15 +145,15 @@ Bitext<Token>
typename list<sptr<job> >::iterator j = joblist.begin();
while (j != joblist.end())
{
- if ((*j)->done())
+ if ((*j)->done())
{
(*j)->stats->release();
joblist.erase(j++);
- }
+ }
else if ((*j)->workers >= 4) ++j; // no more than 4 workers per job
else break; // found one
}
- if (joblist.size())
+ if (joblist.size())
{
ret = j == joblist.end() ? joblist.front() : *j;
// if we've reached the end of the queue (all jobs have 4 workers on them),
@@ -175,12 +175,12 @@ agenda::
for (size_t i = 0; i < workers.size(); ++i)
workers[i]->join();
}
-
+
template<typename Token>
Bitext<Token>::
agenda::
agenda(Bitext<Token> const& thebitext)
: shutdown(false), doomed(0), bt(thebitext)
{ }
-
+
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h
index 0e26b6182..0e0624351 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h
@@ -4,48 +4,48 @@
// todo: add check to enforce this
template<typename Token>
-class
+class
Bitext<Token>::agenda::
-job
+job
{
#if UG_BITEXT_TRACK_ACTIVE_THREADS
static ThreadSafeCounter active;
#endif
Bitext<Token> const* const m_bitext;
- boost::mutex lock;
+ boost::mutex lock;
friend class agenda;
- boost::taus88 rnd; // every job has its own pseudo random generator
+ boost::taus88 rnd; // every job has its own pseudo random generator
double rnddenom; // denominator for scaling random sampling
size_t min_diverse; // minimum number of distinct translations
- bool flip_coin(uint64_t & sid, uint64_t & offset);
+ bool flip_coin(uint64_t & sid, uint64_t & offset);
bool step(uint64_t & sid, uint64_t & offset); // proceed to next occurrence
public:
size_t workers; // how many workers are working on this job?
sptr<TSA<Token> const> root; // root of the underlying suffix array
- char const* next; // next position to read from
+ char const* next; // next position to read from
char const* stop; // end of index range
size_t max_samples; // how many samples to extract at most
size_t ctr; /* # of phrase occurrences considered so far
- * # of samples chosen is stored in stats->good
+ * # of samples chosen is stored in stats->good
*/
size_t len; // phrase length
- bool fwd; // if true, source phrase is L1
+ bool fwd; // if true, source phrase is L1
sptr<pstats> stats; // stores statistics collected during sampling
sptr<SamplingBias const> const m_bias; // sentence-level bias for sampling
float bias_total;
bool nextSample(uint64_t & sid, uint64_t & offset); // select next occurrence
-
- int
+
+ int
check_sample_distribution(uint64_t const& sid, uint64_t const& offset);
- // for biased sampling: ensure the distribution approximately matches
+ // for biased sampling: ensure the distribution approximately matches
// the bias
-
+
bool done() const;
- job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& m,
- sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd,
+ job(Bitext<Token> const* const theBitext,
+ typename TSA<Token>::tree_iterator const& m,
+ sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd,
sptr<SamplingBias const> const& bias);
~job();
};
@@ -57,15 +57,15 @@ Bitext<Token>::agenda::job
if (stats) stats.reset();
#if UG_BITEXT_TRACK_ACTIVE_THREADS
// counter may not exist any more at destruction time, hence try .. catch ...
- try { --active; } catch (...) {}
+ try { --active; } catch (...) {}
#endif
}
template<typename Token>
Bitext<Token>::agenda::job
::job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& m,
- sptr<TSA<Token> > const& r, size_t maxsmpl,
+ typename TSA<Token>::tree_iterator const& m,
+ sptr<TSA<Token> > const& r, size_t maxsmpl,
bool isfwd, sptr<SamplingBias const> const& bias)
: m_bitext(theBitext)
, rnd(0)
@@ -83,9 +83,9 @@ Bitext<Token>::agenda::job
{
stats.reset(new pstats());
stats->raw_cnt = m.approxOccurrenceCount();
- bias_total = 0;
-
- // we need to renormalize on the fly, as the summ of all sentence probs over
+ bias_total = 0;
+
+ // we need to renormalize on the fly, as the summ of all sentence probs over
// all candidates (not all sentences in the corpus) needs to add to 1.
// Profiling question: how much does that cost us?
if (m_bias)
@@ -98,8 +98,8 @@ Bitext<Token>::agenda::job
x = root->readSid(x,stop,sid);
x = root->readOffset(x,stop,offset);
#if 0
- cerr << ctr++ << " " << m.str(m_bitext->V1.get())
- << " " << sid << "/" << root->getCorpusSize()
+ cerr << ctr++ << " " << m.str(m_bitext->V1.get())
+ << " " << sid << "/" << root->getCorpusSize()
<< " " << offset << " " << stop-x << endl;
#endif
bias_total += (*m_bias)[sid];
@@ -108,7 +108,7 @@ Bitext<Token>::agenda::job
}
#if UG_BITEXT_TRACK_ACTIVE_THREADS
++active;
- // if (active%5 == 0)
+ // if (active%5 == 0)
// cerr << size_t(active) << " active jobs at " << __FILE__ << ":" << __LINE__ << endl;
#endif
}
@@ -116,8 +116,8 @@ Bitext<Token>::agenda::job
template<typename Token>
bool Bitext<Token>::agenda::job
::done() const
-{
- return (max_samples && stats->good >= max_samples) || next == stop;
+{
+ return (max_samples && stats->good >= max_samples) || next == stop;
}
template<typename Token>
@@ -125,39 +125,39 @@ int Bitext<Token>::agenda::job
::check_sample_distribution(uint64_t const& sid, uint64_t const& offset)
{ // ensure that the sampled distribution approximately matches the bias
// @return 0: SKIP this occurrence
- // @return 1: consider this occurrence for sampling
+ // @return 1: consider this occurrence for sampling
// @return 2: include this occurrence in the sample by all means
if (!m_bias) return 1;
-
+
using namespace boost::math;
typedef boost::math::binomial_distribution<> binomial;
-
+
ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
-
- float p = (*m_bias)[sid];
- id_type docid = m_bias->GetClass(sid);
- uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
+
+ float p = (*m_bias)[sid];
+ id_type docid = m_bias->GetClass(sid);
+ uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
// always consider candidates from dominating documents and
// from documents that have not been considered at all yet
bool ret = (p > .5 || k == 0);
-
+
if (ret && !log) return 1;
-
+
uint32_t N = stats->good; // number of trials
- float d = cdf(complement(binomial(N, p), k));
+ float d = cdf(complement(binomial(N, p), k));
// d: probability that samples contains k or more instances from doc #docid
- ret = ret || d >= .05;
-
+ ret = ret || d >= .05;
+
if (log)
{
Token const* t = root->getCorpus()->sntStart(sid)+offset;
Token const* x = t - min(offset,uint64_t(3));
- Token const* e = t+4;
+ Token const* e = t+4;
if (e > root->getCorpus()->sntEnd(sid))
e = root->getCorpus()->sntEnd(sid);
- *log << docid << ":" << sid << " " << size_t(k) << "/" << N
+ *log << docid << ":" << sid << " " << size_t(k) << "/" << N
<< " @" << p << " => " << d << " [";
for (size_t i = 0; i < stats->indoc.size(); ++i)
{
@@ -170,8 +170,8 @@ int Bitext<Token>::agenda::job
else if (p < .5 && d > .9) *log << "FORCE";
*log << endl;
}
-
- return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);
+
+ return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);
}
template<typename Token>
@@ -186,7 +186,7 @@ bool Bitext<Token>::agenda::job
size_t options_total = max(stats->raw_cnt, this->ctr);
size_t options_left = (options_total - this->ctr);
size_t random_number = options_left * (rnd()/(rnd.max()+1.));
- size_t threshold;
+ size_t threshold;
if (bias_total) // we have a bias and there are candidates with non-zero prob
threshold = ((*m_bias)[sid]/bias_total * options_total * max_samples);
else // no bias, or all have prob 0 (can happen with a very opinionated bias)
@@ -199,7 +199,7 @@ bool Bitext<Token>::agenda::job
::step(uint64_t & sid, uint64_t & offset)
{ // caller must lock!
if (next == stop) return false;
- UTIL_THROW_IF2
+ UTIL_THROW_IF2
( next > stop, "Fatal error at " << HERE << ". How did that happen?" );
// boost::lock_guard<boost::mutex> jguard(lock); // caller must lock!
next = root->readSid(next, stop, sid);
@@ -214,21 +214,21 @@ bool Bitext<Token>::agenda::job
{
boost::lock_guard<boost::mutex> jguard(lock);
if (max_samples == 0) // no sampling, consider all occurrences
- return step(sid, offset);
+ return step(sid, offset);
- while (step(sid,offset))
+ while (step(sid,offset))
{
size_t good = stats->good;
size_t diversity = stats->trg.size();
- if (good >= max_samples && diversity >= min_diverse)
+ if (good >= max_samples && diversity >= min_diverse)
return false; // done
- // flip_coin softly enforces approximation of the sampling to the
+ // flip_coin softly enforces approximation of the sampling to the
// bias (occurrences that would steer the sample too far from the bias
// are ruled out), and flips a biased coin otherwise.
if (!flip_coin(sid,offset)) continue;
return true;
- }
+ }
return false;
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h
index 92ed3d36a..5ff39312c 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h
@@ -7,13 +7,13 @@ Bitext<Token>::agenda
::operator()()
{
// things to do:
- //
+ //
// - have each worker maintain their own pstats object and merge
// results at the end (to minimize mutex locking);
- //
+ //
// - use a non-locked, monotonically increasing counter to
// ensure the minimum size of samples considered --- it's OK if
- // we look at more samples than required. This way, we can
+ // we look at more samples than required. This way, we can
// reduce the number of lock / unlock operations we need to do
// during sampling.
@@ -38,13 +38,13 @@ Bitext<Token>::agenda
s1, s2, e1, e2, po_fwd, po_bwd, // bounds & orientation
&aln, full_aln, !j->fwd)); // aln info / flip sides?
- if (!good)
+ if (!good)
{ // no good, probably because phrase is not coherent
j->stats->count_sample(docid, 0, po_fwd, po_bwd);
continue;
}
- // all good: register this sample as valid
+ // all good: register this sample as valid
size_t num_pairs = (s2-s1+1) * (e2-e1+1);
j->stats->count_sample(docid, num_pairs, po_fwd, po_bwd);
@@ -52,14 +52,14 @@ Bitext<Token>::agenda
Token const* t = ag.bt.T2->sntStart(sid);
Token const* eos = ag.bt.T2->sntEnd(sid);
cerr << "[" << j->stats->good + 1 << "] ";
- while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
+ while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
cerr << "[" << docid << "]" << endl;
#endif
float sample_weight = 1./num_pairs;
Token const* o = (j->fwd ? ag.bt.T2 : ag.bt.T1)->sntStart(sid);
- // adjust offsets in phrase-internal aligment
+ // adjust offsets in phrase-internal aligment
for (size_t k = 1; k < aln.size(); k += 2) aln[k] += s2 - s1;
vector<uint64_t> seen; seen.reserve(10);
@@ -93,7 +93,7 @@ Bitext<Token>::agenda
UTIL_THROW_IF2(!ok, "Could not extend target phrase.");
}
if (s < s2) // shift phrase-internal alignments
- for (size_t k = 1; k < aln.size(); k += 2)
+ for (size_t k = 1; k < aln.size(); k += 2)
--aln[k];
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
index cb3804edc..bcda9ebf3 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
@@ -16,12 +16,12 @@ namespace Moses
jstats::
jstats()
: my_rcnt(0), my_cnt2(0), my_wcnt(0)
- {
- for (int i = 0; i <= Moses::LRModel::NONE; ++i)
+ {
+ for (int i = 0; i <= Moses::LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
my_aln.reserve(1);
}
-
+
jstats::
jstats(jstats const& other)
{
@@ -35,8 +35,8 @@ namespace Moses
obwd[i] = other.obwd[i];
}
}
-
- uint32_t
+
+ uint32_t
jstats::
dcnt_fwd(PhraseOrientation const idx) const
{
@@ -44,15 +44,15 @@ namespace Moses
return ofwd[idx];
}
- uint32_t
+ uint32_t
jstats::
dcnt_bwd(PhraseOrientation const idx) const
{
assert(idx <= Moses::LRModel::NONE);
return obwd[idx];
}
-
- void
+
+ void
jstats::
add(float w, vector<uchar> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient, int const docid)
@@ -65,7 +65,7 @@ namespace Moses
{
size_t i = 0;
while (i < my_aln.size() && my_aln[i].second != a) ++i;
- if (i == my_aln.size())
+ if (i == my_aln.size())
my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
else
my_aln[i].first++;
@@ -83,7 +83,7 @@ namespace Moses
vector<pair<size_t, vector<uchar> > > const&
jstats::
- aln() const
+ aln() const
{ return my_aln; }
} // namespace bitext
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
index ce2e89438..dade27649 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
@@ -4,20 +4,20 @@
#include "ug_lexical_reordering.h"
#include <boost/thread.hpp>
-namespace Moses
+namespace Moses
{
namespace bitext
{
using namespace ugdiss;
- // "joint" (i.e., phrase pair) statistics
+ // "joint" (i.e., phrase pair) statistics
class
jstats
{
boost::mutex lock;
uint32_t my_rcnt; // unweighted joint count
uint32_t my_cnt2; // raw counts L2
- float my_wcnt; // weighted joint count
+ float my_wcnt; // weighted joint count
// to do: use a static alignment pattern store that stores each pattern only
// once, so that we don't have to store so many alignment vectors
@@ -33,18 +33,18 @@ namespace Moses
uint32_t rcnt() const; // raw joint counts
uint32_t cnt2() const; // raw target phrase occurrence count
float wcnt() const; // weighted joint counts
-
+
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a, uint32_t const cnt2,
- uint32_t fwd_orient, uint32_t bwd_orient,
+ uint32_t fwd_orient, uint32_t bwd_orient,
int const docid);
void invalidate();
void validate();
bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
- void fill_lr_vec(Moses::LRModel::Direction const& dir,
- Moses::LRModel::ModelType const& mdl,
+ void fill_lr_vec(Moses::LRModel::Direction const& dir,
+ Moses::LRModel::ModelType const& mdl,
vector<float>& v);
};
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc
index 482957508..580d7669b 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc
@@ -8,11 +8,11 @@ namespace Moses
#if UG_BITEXT_TRACK_ACTIVE_THREADS
ThreadSafeCounter pstats::active;
#endif
-
+
pstats::
pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
{
- for (int i = 0; i <= Moses::LRModel::NONE; ++i)
+ for (int i = 0; i <= Moses::LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
}
@@ -21,7 +21,7 @@ namespace Moses
{
#if UG_BITEXT_TRACK_ACTIVE_THREADS
// counter may not exist any more at destruction time, so try ... catch
- try { --active; } catch (...) {}
+ try { --active; } catch (...) {}
#endif
}
@@ -33,7 +33,7 @@ namespace Moses
++this->in_progress;
this->lock.unlock();
}
-
+
void
pstats::
release()
@@ -44,9 +44,9 @@ namespace Moses
this->lock.unlock();
}
- void
+ void
pstats
- ::count_sample(int const docid, size_t const num_pairs,
+ ::count_sample(int const docid, size_t const num_pairs,
int const po_fwd, int const po_bwd)
{
boost::lock_guard<boost::mutex> guard(lock);
@@ -65,10 +65,10 @@ namespace Moses
bool
pstats::
- add(uint64_t pid, float const w,
- vector<uchar> const& a,
- uint32_t const cnt2,
- uint32_t fwd_o,
+ add(uint64_t pid, float const w,
+ vector<uchar> const& a,
+ uint32_t const cnt2,
+ uint32_t fwd_o,
uint32_t bwd_o, int const docid)
{
boost::lock_guard<boost::mutex> guard(this->lock);
@@ -76,7 +76,7 @@ namespace Moses
entry.add(w, a, cnt2, fwd_o, bwd_o, docid);
if (this->good < entry.rcnt())
{
- UTIL_THROW(util::Exception, "more joint counts than good counts:"
+ UTIL_THROW(util::Exception, "more joint counts than good counts:"
<< entry.rcnt() << "/" << this->good << "!");
}
return true;
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
index c5b6c0152..9a14e378b 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
@@ -12,7 +12,7 @@ namespace Moses
{
namespace bitext
{
- struct
+ struct
pstats
{
typedef boost::unordered_map<uint64_t, sptr<pstats> > map_t;
@@ -23,8 +23,8 @@ namespace Moses
#endif
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for me to be ready
-
- size_t raw_cnt; // (approximate) raw occurrence count
+
+ size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
@@ -34,25 +34,25 @@ namespace Moses
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
std::vector<uint32_t> indoc; // distribution over where samples came from
-
+
typedef std::map<uint64_t, jstats> trg_map_t;
trg_map_t trg;
pstats();
~pstats();
void release();
void register_worker();
- size_t count_workers() { return in_progress; }
+ size_t count_workers() { return in_progress; }
- bool
+ bool
add(uint64_t const pid, // target phrase id
float const w, // sample weight (1./(# of phrases extractable))
alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation
- int const docid); // document where sample was found
+ int const docid); // document where sample was found
- void
+ void
count_sample(int const docid, // document where sample was found
size_t const num_pairs, // # of phrases extractable here
int const po_fwd, // fwd phrase orientation
diff --git a/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h b/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h
index 845fe374e..89dc93ad1 100644
--- a/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h
+++ b/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h
@@ -25,13 +25,13 @@ namespace ugdiss
return NULL;
};
- ConllBottomUpToken const*
- stop(ConllBottomUpToken const* seqStart,
+ ConllBottomUpToken const*
+ stop(ConllBottomUpToken const* seqStart,
ConllBottomUpToken const* seqEnd) const
{
return NULL;
};
-
+
bool operator<(T const& other) const { return this->cmp(other) < 0; }
bool operator>(T const& other) const { return this->cmp(other) > 0; }
bool operator==(T const& other) const { return this->cmp(other) == 0; }
@@ -44,9 +44,9 @@ namespace ugdiss
return false;
}
};
-
+
template<typename T>
- ConllBottomUpToken<T> const*
+ ConllBottomUpToken<T> const*
ConllBottomUpToken<T>::
next(int length) const
{
diff --git a/moses/TranslationModel/UG/mm/ug_conll_record.h b/moses/TranslationModel/UG/mm/ug_conll_record.h
index ea2cda29e..e52a4974b 100644
--- a/moses/TranslationModel/UG/mm/ug_conll_record.h
+++ b/moses/TranslationModel/UG/mm/ug_conll_record.h
@@ -3,22 +3,22 @@
#include "ug_typedefs.h"
// Base class for dependency tree corpora with POS and Lemma annotations
-namespace ugdiss
+namespace ugdiss
{
using namespace std;
- class
- Conll_Record
+ class
+ Conll_Record
{
public:
id_type sform; // surface form
id_type lemma; // lemma
uchar majpos; // major part of speech
uchar minpos; // minor part of speech
- short parent; // id of parent
+ short parent; // id of parent
uchar dtype; // dependency type
uchar info[3]; /* additional information (depends on the part of speech)
- * a place holder for the time being, to ensure proper
+ * a place holder for the time being, to ensure proper
* alignment in memory */
Conll_Record();
Conll_Record const* up(int length=1) const;
@@ -38,8 +38,8 @@ namespace ugdiss
* @parameter PS Vocabulary for part-of-speech
* @parameter DT Vocabulary for dependency type
*/
- Conll_Record(string const& line,
- TokenIndex const& SF, TokenIndex const& LM,
+ Conll_Record(string const& line,
+ TokenIndex const& SF, TokenIndex const& LM,
TokenIndex const& PS, TokenIndex const& DT);
/** store the record as-is to disk (for memory-mapped reading later) */
@@ -62,7 +62,7 @@ namespace ugdiss
// this is for contigous word sequences extracted from longer sequences
// adjust parent pointers to 0 (no parent) if they point out of the
// subsequence
- void
+ void
fixParse(Conll_Record* start, Conll_Record* stop);
} // end of namespace ugdiss
diff --git a/moses/TranslationModel/UG/mm/ug_corpus_token.cc b/moses/TranslationModel/UG/mm/ug_corpus_token.cc
index 742c17ace..4be8cbd95 100644
--- a/moses/TranslationModel/UG/mm/ug_corpus_token.cc
+++ b/moses/TranslationModel/UG/mm/ug_corpus_token.cc
@@ -6,9 +6,9 @@ namespace ugdiss
{
id_type const&
SimpleWordId::
- id() const
- {
- return theID;
+ id() const
+ {
+ return theID;
}
int
diff --git a/moses/TranslationModel/UG/mm/ug_corpus_token.h b/moses/TranslationModel/UG/mm/ug_corpus_token.h
index c1baaf21e..b9693cbf2 100644
--- a/moses/TranslationModel/UG/mm/ug_corpus_token.h
+++ b/moses/TranslationModel/UG/mm/ug_corpus_token.h
@@ -19,7 +19,7 @@ namespace ugdiss
{
/** Simple wrapper around id_type for use with the Ttrack/TSA template classes */
- class SimpleWordId
+ class SimpleWordId
{
id_type theID;
public:
@@ -29,7 +29,7 @@ namespace ugdiss
bool operator==(SimpleWordId const& other) const;
id_type remap(vector<id_type const*> const& m) const;
};
-
+
/** Token class for suffix arrays */
template<typename T>
class
@@ -43,16 +43,16 @@ namespace ugdiss
L2R_Token const* next(int n=1) const { return this+n; }
- /** return a pointer to the end of a sentence; used as a stopping criterion during
+ /** return a pointer to the end of a sentence; used as a stopping criterion during
* comparison of suffixes; see Ttrack::cmp() */
template<typename TTRACK_TYPE>
- L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
- {
- return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid));
+ L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
+ {
+ return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid));
}
- L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const
- {
+ L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const
+ {
return seqEnd;
}
@@ -69,20 +69,20 @@ namespace ugdiss
{
public:
typedef T Token;
-
+
R2L_Token() : T() {};
R2L_Token(id_type id) : T(id) {};
R2L_Token const* next(int n = 1) const { return this - n; }
template<typename TTRACK_TYPE>
- R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
- {
- return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1);
+ R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
+ {
+ return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1);
}
- R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const
- {
+ R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const
+ {
assert(seqStart);
return seqStart - 1;
}
diff --git a/moses/TranslationModel/UG/mm/ug_deptree.cc b/moses/TranslationModel/UG/mm/ug_deptree.cc
index 545268e04..003d9b35e 100644
--- a/moses/TranslationModel/UG/mm/ug_deptree.cc
+++ b/moses/TranslationModel/UG/mm/ug_deptree.cc
@@ -7,14 +7,14 @@ using namespace std;
namespace ugdiss
{
- bool
+ bool
Conll_Record::
isDescendentOf(Conll_Record const* other) const
{
Conll_Record const* a = this;
- while (a != other && a->parent)
+ while (a != other && a->parent)
a += a->parent;
- return a==other;
+ return a==other;
}
Conll_Record&
@@ -43,7 +43,7 @@ namespace ugdiss
}
Conll_AllFields::
- Conll_AllFields()
+ Conll_AllFields()
: Conll_Record::Conll_Record()
{};
@@ -64,7 +64,7 @@ namespace ugdiss
}
Conll_WildCard::
- Conll_WildCard()
+ Conll_WildCard()
: Conll_Record::Conll_Record()
{};
@@ -95,8 +95,8 @@ namespace ugdiss
#if 0
Conll_Record::
- Conll_Record(string const& line,
- TokenIndex const& SF, TokenIndex const& LM,
+ Conll_Record(string const& line,
+ TokenIndex const& SF, TokenIndex const& LM,
TokenIndex const& PS, TokenIndex const& DT)
{
@@ -140,35 +140,35 @@ namespace ugdiss
#endif
Conll_Sform::
- Conll_Sform()
- : Conll_Record::Conll_Record()
+ Conll_Sform()
+ : Conll_Record::Conll_Record()
{};
Conll_MinPos::
- Conll_MinPos()
- : Conll_Record::Conll_Record()
+ Conll_MinPos()
+ : Conll_Record::Conll_Record()
{};
-
+
Conll_MinPos_Lemma::
- Conll_MinPos_Lemma()
- : Conll_Record::Conll_Record()
+ Conll_MinPos_Lemma()
+ : Conll_Record::Conll_Record()
{};
Conll_Lemma::
Conll_Lemma()
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{};
Conll_Lemma::
Conll_Lemma(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->lemma = _id;
};
Conll_MinPos::
Conll_MinPos(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->minpos = _id;
};
@@ -182,7 +182,7 @@ namespace ugdiss
Conll_MajPos::
Conll_MajPos(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->majpos = _id;
};
@@ -219,21 +219,21 @@ namespace ugdiss
Conll_MinPos_Lemma::
cmp(Conll_Record const& other) const
{
- if (this->minpos != 0 && other.minpos != 0 && this->minpos != other.minpos)
+ if (this->minpos != 0 && other.minpos != 0 && this->minpos != other.minpos)
return this->minpos < other.minpos ? -1 : 1;
if (this->lemma != 0 && other.lemma != 0 && this->lemma != other.lemma)
return this->lemma < other.lemma ? -1 : 1;
return 0;
}
- id_type
+ id_type
Conll_Lemma::
- id() const
- {
- return this->lemma;
+ id() const
+ {
+ return this->lemma;
}
- int
+ int
Conll_Lemma::
cmp(Conll_Record const& other) const
{
@@ -251,16 +251,16 @@ namespace ugdiss
Conll_Sform::
Conll_Sform(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->sform = _id;
};
- id_type
+ id_type
Conll_Sform
- ::id() const
- {
- return this->sform;
+ ::id() const
+ {
+ return this->sform;
}
int
@@ -282,7 +282,7 @@ namespace ugdiss
short p = w[i].rec->parent;
if (p != 0)
{
- if (p > 0) assert(i+p < w.size());
+ if (p > 0) assert(i+p < w.size());
else assert(i >= size_t(-p));
w[i].parent = &(w[i+p]);
w[i].parent->children.push_back(&(w[i]));
@@ -291,7 +291,7 @@ namespace ugdiss
}
#endif
- /** @return true if the linear sequence of /Conll_Record/s is coherent,
+ /** @return true if the linear sequence of /Conll_Record/s is coherent,
* i.e., a proper connected tree structure */
bool
isCoherent(Conll_Record const* const start, Conll_Record const* const stop)
@@ -300,16 +300,16 @@ namespace ugdiss
for (Conll_Record const* x = start; outOfRange <= 1 && x < stop; ++x)
{
Conll_Record const* n = x->up();
- if (!n || n < start || n >= stop)
+ if (!n || n < start || n >= stop)
outOfRange++;
}
return outOfRange<=1;
}
-
+
// this is for contigous word sequences extracted from longer sequences
// adjust parent pointers to 0 (no parent) if they point out of the
// subsequence
- void
+ void
fixParse(Conll_Record* start, Conll_Record* stop)
{
int len = stop-start;
diff --git a/moses/TranslationModel/UG/mm/ug_deptree.h b/moses/TranslationModel/UG/mm/ug_deptree.h
index 0d393aa33..b28a4bbe8 100644
--- a/moses/TranslationModel/UG/mm/ug_deptree.h
+++ b/moses/TranslationModel/UG/mm/ug_deptree.h
@@ -19,8 +19,8 @@ using namespace std;
namespace ugdiss
{
- // Fills the vector v with pointers to the internal root r_x for the
- // stretch [start,x] for all x: start <= x < stop. If the stretch
+ // Fills the vector v with pointers to the internal root r_x for the
+ // stretch [start,x] for all x: start <= x < stop. If the stretch
// is incoherent, r_x is NULL
template<typename T>
void
@@ -37,8 +37,8 @@ namespace ugdiss
{
size_t p = x-start;
root[p] = x+x->parent;
- for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i))
- if (root[i]==x)
+ for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i))
+ if (root[i]==x)
isR.reset(i);
if (root[p] < start || root[p] >= stop)
isR.set(x-start);
@@ -46,7 +46,7 @@ namespace ugdiss
}
}
- // return the root of the tree if the span [start,stop) constitutes a
+ // return the root of the tree if the span [start,stop) constitutes a
// tree, NULL otherwise
template<typename T>
T const*
@@ -66,7 +66,7 @@ namespace ugdiss
assert(outOfRange);
return outOfRange == 1 ? root : NULL;
}
-
+
// return the governor of the tree given by [start,stop) if the span
// constitutes a tree, NULL otherwise
template<typename T>
@@ -82,7 +82,7 @@ namespace ugdiss
{
if (root && n != root)
numRoots++;
- else
+ else
{
root = n;
if (!numRoots) numRoots++;
@@ -101,7 +101,7 @@ namespace ugdiss
T const* b = as<T>(&(*v.end()));
return (a==b) ? NULL : findInternalRoot<T>(a,b);
}
-
+
#if 1
class DTNode
{
@@ -113,7 +113,7 @@ namespace ugdiss
};
/** A parsed sentence */
- class
+ class
DependencyTree
{
public:
@@ -189,13 +189,13 @@ namespace ugdiss
int cmp(Conll_Record const& other) const;
};
- /** @return true if the linear sequence of /Conll_Record/s is coherent,
+ /** @return true if the linear sequence of /Conll_Record/s is coherent,
* i.e., a proper connected tree structure */
bool
isCoherent(Conll_Record const* start, Conll_Record const* const stop);
- /** @return the root node of the tree covering the span [start,stop), if the span is coherent;
+ /** @return the root node of the tree covering the span [start,stop), if the span is coherent;
* NULL otherwise */
template<typename T>
T const* topNode(T const* start , T const* stop)
@@ -204,9 +204,9 @@ namespace ugdiss
for (T const* x = start; x < stop; ++x)
{
T const* n = reinterpret_cast<T const*>(x->up());
- if (!n || n < start || n >= stop)
+ if (!n || n < start || n >= stop)
{
- if (ret) return NULL;
+ if (ret) return NULL;
else ret = x;
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_im_bitext.cc b/moses/TranslationModel/UG/mm/ug_im_bitext.cc
index 9f26a181b..b411cc7dc 100644
--- a/moses/TranslationModel/UG/mm/ug_im_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_im_bitext.cc
@@ -6,15 +6,15 @@ namespace Moses
{
template<>
- sptr<imBitext<L2R_Token<SimpleWordId> > >
+ sptr<imBitext<L2R_Token<SimpleWordId> > >
imBitext<L2R_Token<SimpleWordId> >::
- add(vector<string> const& s1,
- vector<string> const& s2,
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& aln) const
{
typedef L2R_Token<SimpleWordId> TKN;
assert(s1.size() == s2.size() && s1.size() == aln.size());
-
+
#ifndef NDEBUG
size_t first_new_snt = this->T1 ? this->T1->size() : 0;
#endif
@@ -24,7 +24,7 @@ namespace Moses
boost::unique_lock<boost::shared_mutex> guard(m_lock);
ret.reset(new imBitext<TKN>(*this));
}
-
+
// we add the sentences in separate threads (so it's faster)
boost::thread thread1(snt_adder<TKN>(s1,*ret->V1,ret->myT1,ret->myI1));
// thread1.join(); // for debugging
@@ -41,10 +41,10 @@ namespace Moses
binwrite(obuf,row);
binwrite(obuf,col);
}
- // important: DO NOT replace the two lines below this comment by
- // char const* x = obuf.str().c_str(), as the memory x is pointing
+ // important: DO NOT replace the two lines below this comment by
+ // char const* x = obuf.str().c_str(), as the memory x is pointing
// to is freed immediately upon deconstruction of the string object.
- string foo = obuf.str();
+ string foo = obuf.str();
char const* x = foo.c_str();
vector<char> v(x,x+foo.size());
ret->myTx = append(ret->myTx, v);
diff --git a/moses/TranslationModel/UG/mm/ug_im_bitext.h b/moses/TranslationModel/UG/mm/ug_im_bitext.h
index a620b7219..63e44f1b9 100644
--- a/moses/TranslationModel/UG/mm/ug_im_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_im_bitext.h
@@ -4,7 +4,7 @@
namespace Moses
{
- namespace bitext
+ namespace bitext
{
template<typename TKN>
class imBitext : public Bitext<TKN>
@@ -12,7 +12,7 @@ namespace Moses
sptr<imTtrack<char> > myTx;
sptr<imTtrack<TKN> > myT1;
sptr<imTtrack<TKN> > myT2;
- sptr<imTSA<TKN> > myI1;
+ sptr<imTSA<TKN> > myI1;
sptr<imTSA<TKN> > myI2;
static ThreadSafeCounter my_revision;
public:
@@ -23,26 +23,26 @@ namespace Moses
size_t max_sample = 5000, size_t num_workers=4);
imBitext(size_t max_sample = 5000, size_t num_workers=4);
imBitext(imBitext const& other);
-
- // sptr<imBitext<TKN> >
+
+ // sptr<imBitext<TKN> >
// add(vector<TKN> const& s1, vector<TKN> const& s2, vector<ushort> & a);
- sptr<imBitext<TKN> >
- add(vector<string> const& s1,
- vector<string> const& s2,
+ sptr<imBitext<TKN> >
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& a) const;
};
template<typename TKN>
- ThreadSafeCounter
+ ThreadSafeCounter
imBitext<TKN>::my_revision;
template<typename TKN>
imBitext<TKN>::
imBitext(size_t max_sample, size_t num_workers)
: Bitext<TKN>(max_sample, num_workers)
- {
+ {
this->m_default_sample_size = max_sample;
this->V1.reset(new TokenIndex());
this->V2.reset(new TokenIndex());
@@ -50,14 +50,14 @@ namespace Moses
this->V2->setDynamic(true);
++my_revision;
}
-
+
template<typename TKN>
imBitext<TKN>::
imBitext(sptr<TokenIndex> const& v1,
sptr<TokenIndex> const& v2,
size_t max_sample, size_t num_workers)
: Bitext<TKN>(max_sample, num_workers)
- {
+ {
// this->default_sample_size = max_sample;
this->V1 = v1;
this->V2 = v2;
@@ -65,12 +65,12 @@ namespace Moses
this->V2->setDynamic(true);
++my_revision;
}
-
+
template<typename TKN>
imBitext<TKN>::
imBitext(imBitext<TKN> const& other)
- {
+ {
this->myTx = other.myTx;
this->myT1 = other.myT1;
this->myT2 = other.myT2;
@@ -89,17 +89,17 @@ namespace Moses
}
template<>
- sptr<imBitext<L2R_Token<SimpleWordId> > >
+ sptr<imBitext<L2R_Token<SimpleWordId> > >
imBitext<L2R_Token<SimpleWordId> >::
- add(vector<string> const& s1,
- vector<string> const& s2,
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& aln) const;
template<typename TKN>
- sptr<imBitext<TKN> >
+ sptr<imBitext<TKN> >
imBitext<TKN>::
- add(vector<string> const& s1,
- vector<string> const& s2,
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& aln) const
{
throw "Not yet implemented";
diff --git a/moses/TranslationModel/UG/mm/ug_im_tsa.h b/moses/TranslationModel/UG/mm/ug_im_tsa.h
index f7256ba2d..e920d9f96 100644
--- a/moses/TranslationModel/UG/mm/ug_im_tsa.h
+++ b/moses/TranslationModel/UG/mm/ug_im_tsa.h
@@ -23,7 +23,7 @@ namespace ugdiss
using namespace std;
using namespace boost;
namespace bio=boost::iostreams;
-
+
// template<typename TOKEN> class imBitext<TOKEN>;
//-----------------------------------------------------------------------
@@ -35,61 +35,61 @@ namespace ugdiss
public:
class tree_iterator;
friend class tree_iterator;
-
+
private:
vector<cpos> sufa; // stores the actual array
- vector<filepos_type> index; /* top-level index into regions in sufa
+ vector<filepos_type> index; /* top-level index into regions in sufa
* (for faster access) */
private:
- char const*
+ char const*
index_jump(char const* a, char const* z, float ratio) const;
- char const*
+ char const*
getLowerBound(id_type id) const;
- char const*
+ char const*
getUpperBound(id_type id) const;
-
+
public:
imTSA();
- imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
- bdBitset const* filt,
+ imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
+ bdBitset const* filt,
ostream* log = NULL);
- imTSA(imTSA<TOKEN> const& prior,
+ imTSA(imTSA<TOKEN> const& prior,
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize);
- count_type
- sntCnt(char const* p, char const * const q) const;
+ count_type
+ sntCnt(char const* p, char const * const q) const;
- count_type
+ count_type
rawCnt(char const* p, char const * const q) const;
-
- void
- getCounts(char const* p, char const * const q,
+
+ void
+ getCounts(char const* p, char const * const q,
count_type& sids, count_type& raw) const;
-
- char const*
+
+ char const*
readSid(char const* p, char const* q, id_type& sid) const;
-
- char const*
+
+ char const*
readSid(char const* p, char const* q, ::uint64_t& sid) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, uint16_t& offset) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, ::uint64_t& offset) const;
-
- void
+
+ void
sanityCheck() const;
-
- void
+
+ void
save_as_mm_tsa(string fname) const;
-
+
/// add a sentence to the database
- // shared_ptr<imTSA<TOKEN> > add(vector<TOKEN> const& snt) const;
+ // shared_ptr<imTSA<TOKEN> > add(vector<TOKEN> const& snt) const;
};
@@ -108,12 +108,12 @@ namespace ugdiss
tree_iterator(imTSA<TOKEN> const* s)
: TSA<TOKEN>::tree_iterator::tree_iterator(reinterpret_cast<TSA<TOKEN> const*>(s))
{};
-
+
/** jump to the point 1/ratio in a tightly packed index
* assumes that keys are flagged with '1', values with '0'
*/
template<typename TOKEN>
- char const*
+ char const*
imTSA<TOKEN>::
index_jump(char const* a, char const* z, float ratio) const
{
@@ -123,10 +123,10 @@ namespace ugdiss
cpos const* xz = reinterpret_cast<cpos const*>(z);
return reinterpret_cast<char const*>(xa+int(ratio*(xz-xa)));
}
-
+
template<typename TOKEN>
imTSA<TOKEN>::
- imTSA()
+ imTSA()
{
this->indexSize = 0;
// this->data = NULL;
@@ -135,7 +135,7 @@ namespace ugdiss
this->corpusSize = 0;
this->BitSetCachingThreshold=4096;
};
-
+
// build an array from all the tokens in the sentences in *c that are
// specified in filter
template<typename TOKEN>
@@ -153,12 +153,12 @@ namespace ugdiss
}
assert(filter);
// In the first iteration over the corpus, we obtain word counts.
- // They allows us to
+ // They allows us to
// a. allocate the exact amount of memory we need
- // b. place tokens into the right 'section' in the array, based on
+ // b. place tokens into the right 'section' in the array, based on
// the ID of the first token in the sequence. We can then sort
// each section separately.
-
+
if (log) *log << "counting tokens ... ";
int slimit = 65536;
// slimit=65536 is the upper bound of what we can fit into a ushort which
@@ -176,7 +176,7 @@ namespace ugdiss
vector<count_type> tmp(wcnt.size(),0);
for (size_t i = 1; i < wcnt.size(); ++i)
tmp[i] = tmp[i-1] + wcnt[i-1];
-
+
// Now dump all token positions into the right place in sufa
this->corpusSize = 0;
for (id_type sid = filter->find_first();
@@ -204,7 +204,7 @@ namespace ugdiss
for (size_t i = 0; i < wcnt.size(); i++)
{
if (log && wcnt[i] > 5000)
- *log << "sorting " << wcnt[i]
+ *log << "sorting " << wcnt[i]
<< " entries starting with id " << i << "." << endl;
index[i+1] = index[i]+wcnt[i];
assert(index[i+1]==tmp[i]); // sanity check
@@ -247,7 +247,7 @@ namespace ugdiss
imTSA<TOKEN>::
getUpperBound(id_type id) const
{
- if (++id >= this->index.size())
+ if (++id >= this->index.size())
return NULL;
assert(index[id] <= this->sufa.size());
return reinterpret_cast<char const*>(&(this->sufa.front()) + index[id]);
@@ -263,7 +263,7 @@ namespace ugdiss
sid = reinterpret_cast<cpos const*>(p)->sid;
return p;
}
-
+
template<typename TOKEN>
char const*
imTSA<TOKEN>::
@@ -306,11 +306,11 @@ namespace ugdiss
cpos const* xq = reinterpret_cast<cpos const*>(q);
return xq-xp;
}
-
+
template<typename TOKEN>
- void
+ void
imTSA<TOKEN>::
- getCounts(char const* p, char const* const q,
+ getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const
{
id_type sid; // uint16_t off;
@@ -328,7 +328,7 @@ namespace ugdiss
}
template<typename TOKEN>
- void
+ void
imTSA<TOKEN>::
save_as_mm_tsa(string fname) const
{
@@ -352,34 +352,34 @@ namespace ugdiss
for (size_t i = 0; i < mmIndex.size(); i++)
numwrite(out,mmIndex[i]-mmIndex[0]);
out.seekp(0);
- numwrite(out,idxStart);
+ numwrite(out,idxStart);
out.close();
}
template<typename TOKEN>
imTSA<TOKEN>::
- imTSA(imTSA<TOKEN> const& prior,
+ imTSA(imTSA<TOKEN> const& prior,
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize)
{
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(crp.get());
-
+
// count how many tokens will be added to the TSA
// and index the new additions to the corpus
size_t newToks = 0;
- BOOST_FOREACH(id_type sid, newsids)
+ BOOST_FOREACH(id_type sid, newsids)
newToks += crp->sntLen(sid);
vector<cpos> nidx(newToks); // new array entries
-
+
size_t n = 0;
- BOOST_FOREACH(id_type sid, newsids)
+ BOOST_FOREACH(id_type sid, newsids)
{
assert(sid < crp->size());
for (size_t o = 0; o < (*crp)[sid].size(); ++o, ++n)
{ nidx[n].offset = o; nidx[n].sid = sid; }
}
sort(nidx.begin(),nidx.end(),sorter);
-
+
// create the new suffix array
this->numTokens = newToks + prior.sufa.size();
this->sufa.resize(this->numTokens);
@@ -388,10 +388,10 @@ namespace ugdiss
this->corpusSize = crp->size();
this->corpus = crp;
this->index.resize(vsize+1);
-
+
size_t i = 0;
typename vector<cpos>::iterator k = this->sufa.begin();
- // cerr << newToks << " new items at "
+ // cerr << newToks << " new items at "
// << __FILE__ << ":" << __LINE__ << endl;
for (size_t n = 0; n < nidx.size();)
{
@@ -402,7 +402,7 @@ namespace ugdiss
this->index[i] = k - this->sufa.begin();
if (++i < prior.index.size() && prior.index[i-1] < prior.index[i])
{
- k = copy(prior.sufa.begin() + prior.index[i-1],
+ k = copy(prior.sufa.begin() + prior.index[i-1],
prior.sufa.begin() + prior.index[i], k);
}
}
@@ -410,13 +410,13 @@ namespace ugdiss
if (++i < prior.index.size() && prior.index[i] > prior.index[i-1])
{
size_t j = prior.index[i-1];
- while (j < prior.index[i] && n < nidx.size()
+ while (j < prior.index[i] && n < nidx.size()
&& crp->getToken(nidx[n])->id() < i)
{
assert(k < this->sufa.end());
if (sorter(prior.sufa[j],nidx[n]))
*k++ = prior.sufa[j++];
- else
+ else
*k++ = nidx[n++];
}
while (j < prior.index[i])
@@ -436,7 +436,7 @@ namespace ugdiss
while (++i < this->index.size())
{
if (i < prior.index.size() && prior.index[i-1] < prior.index[i])
- k = copy(prior.sufa.begin() + prior.index[i-1],
+ k = copy(prior.sufa.begin() + prior.index[i-1],
prior.sufa.begin() + prior.index[i], k);
this->index[i] = k - this->sufa.begin();
}
@@ -462,5 +462,5 @@ namespace ugdiss
}
}
-
+
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
index ac49ebcd4..20ab653f4 100644
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// In-memory corpus track
-// (c) 2006-2012 Ulrich Germann.
+// (c) 2006-2012 Ulrich Germann.
#ifndef __ug_im_ttrack
#define __ug_im_ttrack
@@ -36,20 +36,20 @@ namespace ugdiss
template<typename Token> class imTtrack;
template<typename TOKEN>
- typename boost::shared_ptr<imTtrack<TOKEN> >
+ typename boost::shared_ptr<imTtrack<TOKEN> >
append(typename boost::shared_ptr<imTtrack<TOKEN> > const & crp, vector<TOKEN> const & snt);
template<typename Token>
class imTtrack : public Ttrack<Token>
{
-
+
private:
size_t numToks;
boost::shared_ptr<vector<vector<Token> > > myData; // pointer to corpus data
friend class imTSA<Token>;
- friend
- typename boost::shared_ptr<imTtrack<Token> >
+ friend
+ typename boost::shared_ptr<imTtrack<Token> >
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
void m_check_token_count(); // debugging function
@@ -60,14 +60,14 @@ namespace ugdiss
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL);
imTtrack(size_t reserve = 0);
// imTtrack(istream& in, Vocab& V);
-
+
/** return pointer to beginning of sentence */
- Token const* sntStart(size_t sid) const;
+ Token const* sntStart(size_t sid) const;
/** return pointer to beginning of sentence */
- Token const* sntEnd(size_t sid) const;
+ Token const* sntEnd(size_t sid) const;
- size_t size() const;
+ size_t size() const;
size_t numTokens() const;
id_type findSid(Token const* t) const;
@@ -82,16 +82,16 @@ namespace ugdiss
size_t check = 0;
BOOST_FOREACH(vector<Token> const& s, *myData)
check += s.size();
- UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
+ UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
<< " Wrong token count after appending sentence!"
- << " Counted " << check << " but expected "
- << this->numToks << " in a total of " << myData->size()
+ << " Counted " << check << " but expected "
+ << this->numToks << " in a total of " << myData->size()
<< " sentences.");
-
+
}
template<typename Token>
- Token const*
+ Token const*
imTtrack<Token>::
sntStart(size_t sid) const // return pointer to beginning of sentence
{
@@ -99,9 +99,9 @@ namespace ugdiss
if ((*myData)[sid].size() == 0) return NULL;
return &((*myData)[sid].front());
}
-
+
template<typename Token>
- Token const*
+ Token const*
imTtrack<Token>::
sntEnd(size_t sid) const // return pointer to end of sentence
{
@@ -109,9 +109,9 @@ namespace ugdiss
if ((*myData)[sid].size() == 0) return NULL;
return &(*myData)[sid].back()+1;
}
-
+
template<typename Token>
- size_t
+ size_t
imTtrack<Token>::
size() const // return size of corpus (in number of sentences)
{
@@ -120,15 +120,15 @@ namespace ugdiss
// offset in the myIndex than there are sentences
return myData->size();
}
-
+
template<typename Token>
- size_t
+ size_t
imTtrack<Token>::
numTokens() const // return size of corpus (in number of words)
{
return numToks;
}
-
+
template<typename Token>
imTtrack<Token>::
imTtrack(istream& in, TokenIndex const& V, ostream* log)
@@ -140,19 +140,19 @@ namespace ugdiss
boost::unordered_map<string,id_type> H;
for (id_type i = 0; i < V.knownVocabSize(); ++i)
H[V[i]] = i;
- while (getline(in,line))
+ while (getline(in,line))
{
myData->push_back(vector<Token>());
- if (log && ++linectr%1000000==0)
+ if (log && ++linectr%1000000==0)
*log << linectr/1000000 << "M lines of input processed" << endl;
istringstream buf(line);
- while (buf>>w)
+ while (buf>>w)
myData->back().push_back(Token(H[w]));
myData->back().resize(myData.back().size());
numToks += myData->back().size();
}
}
-
+
template<typename Token>
imTtrack<Token>::
imTtrack(size_t reserve)
@@ -171,7 +171,7 @@ namespace ugdiss
BOOST_FOREACH(vector<Token> const& v, *d)
numToks += v.size();
}
-
+
template<typename Token>
id_type
imTtrack<Token>::
@@ -182,7 +182,7 @@ namespace ugdiss
{
vector<Token> const& v = (*myData)[i];
if (v.size() == 0) continue;
- if (&v.front() <= t && &v.back() >= t)
+ if (&v.front() <= t && &v.back() >= t)
break;
}
return i;
@@ -190,7 +190,7 @@ namespace ugdiss
/// add a sentence to the database
template<typename TOKEN>
- boost::shared_ptr<imTtrack<TOKEN> >
+ boost::shared_ptr<imTtrack<TOKEN> >
append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
{
#if 1
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h
index 53628e3b3..742e0dd4e 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h
@@ -15,14 +15,14 @@ using namespace std;
namespace ugdiss
{
- template<typename TKN>
- class
+ template<typename TKN>
+ class
LexicalPhraseScorer1
{
typedef boost::unordered_map<id_type, float> inner_map_t;
vector<inner_map_t> L1_given_L2;
vector<inner_map_t> L2_given_L1;
- void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
+ void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex);
public:
void open(string const& bname, string const& L1, string const& L2,
@@ -34,14 +34,14 @@ namespace ugdiss
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score);
- float permissive_lookup(vector<inner_map_t> const& lex,
+ float permissive_lookup(vector<inner_map_t> const& lex,
id_type const s, id_type const t) const;
};
-
+
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
- load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
+ load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex)
{
boost::iostreams::filtering_istream in;
@@ -52,20 +52,20 @@ namespace ugdiss
while (in >> w1 >> w2 >> p)
{
id_type id1 = V1[w1];
- while (lex.size() <= id1)
+ while (lex.size() <= id1)
lex.push_back(inner_map_t());
lex[id1][V2[w2]] = p;
}
}
-
+
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
open(string const& bname, string const& L1, string const& L2,
TokenIndex & V1, TokenIndex & V2)
{
- string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
- string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
+ string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
+ string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
cout << lex1 << endl;
cout << lex2 << endl;
load_lex(lex1,V1,V2,L1_given_L2);
@@ -86,9 +86,9 @@ namespace ugdiss
{
i1 = aln[k]; i2 = aln[++k];
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
+ p1[i1] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
++c1[i1];
- p2[i2] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
+ p2[i2] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
++c2[i2];
}
fwd_score = 0;
@@ -110,7 +110,7 @@ namespace ugdiss
template<typename TKN>
float
LexicalPhraseScorer1<TKN>::
- permissive_lookup(vector<inner_map_t> const& lex,
+ permissive_lookup(vector<inner_map_t> const& lex,
id_type const s, id_type const t) const
{
if (s >= lex.size()) return 1.0;
@@ -135,9 +135,9 @@ namespace ugdiss
// assert(snt1[i2].id() < L1_given_L2.size());
// assert(snt2[i2].id() < L2_given_L1.size());
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
+ p1[i1] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
++c1[i1];
- p2[i2] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
+ p2[i2] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
++c2[i2];
}
fwd_score = 0;
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
index b7e359223..fdd0366df 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
@@ -18,8 +18,8 @@ using namespace std;
namespace ugdiss
{
- template<typename TKN>
- class
+ template<typename TKN>
+ class
LexicalPhraseScorer2
{
vector<string> ftag;
@@ -28,28 +28,28 @@ namespace ugdiss
table_t COOC;
void open(string const& fname);
template<typename someint>
- void
+ void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<someint> const & aln, float const alpha,
float & fwd_score, float& bwd_score) const;
- void
+ void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float const alpha, float & fwd_score, float& bwd_score) const;
// plup: permissive lookup
- float plup_fwd(id_type const s,id_type const t, float const alpha) const;
+ float plup_fwd(id_type const s,id_type const t, float const alpha) const;
float plup_bwd(id_type const s,id_type const t, float const alpha) const;
- // to be done:
- // - on-the-fly smoothing ?
- // - better (than permissive-lookup) treatment of unknown combinations
+ // to be done:
+ // - on-the-fly smoothing ?
+ // - better (than permissive-lookup) treatment of unknown combinations
// permissive lookup is currently used for compatibility reasons
// - zens-ney smoothed scoring via noisy-or combination
};
-
+
template<typename TKN>
void
LexicalPhraseScorer2<TKN>::
@@ -64,7 +64,7 @@ namespace ugdiss
LexicalPhraseScorer2<TKN>::
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
- vector<someint> const & aln, float const alpha,
+ vector<someint> const & aln, float const alpha,
float & fwd_score, float& bwd_score) const
{
vector<float> p1(e1,0), p2(e2,0);
@@ -74,9 +74,9 @@ namespace ugdiss
{
i1 = aln[k]; i2 = aln[++k];
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += plup_fwd(snt1[i1].id(),snt2[i2].id(),alpha);
+ p1[i1] += plup_fwd(snt1[i1].id(),snt2[i2].id(),alpha);
++c1[i1];
- p2[i2] += plup_bwd(snt1[i1].id(),snt2[i2].id(),alpha);
+ p2[i2] += plup_bwd(snt1[i1].id(),snt2[i2].id(),alpha);
++c2[i2];
}
fwd_score = 0;
@@ -105,19 +105,19 @@ namespace ugdiss
<< ": alpha parameter must be >= 0");
float ret = COOC[s][t]+alpha;
ret = (ret?ret:1.)/(COOC.m1(s)+alpha);
- UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
+ UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
<< ": result not > 0 and <= 1. alpha = " << alpha << "; "
<< COOC[s][t] << "/" << COOC.m1(s));
#if 0
- cerr << "[" << s << "," << t << "] "
- << COOC.m1(s) << "/"
- << COOC[s][t] << "/"
+ cerr << "[" << s << "," << t << "] "
+ << COOC.m1(s) << "/"
+ << COOC[s][t] << "/"
<< COOC.m2(t) << endl;
#endif
return ret;
}
-
+
template<typename TKN>
float
LexicalPhraseScorer2<TKN>::
@@ -128,11 +128,11 @@ namespace ugdiss
<< ": alpha parameter must be >= 0");
float ret = float(COOC[s][t]+alpha);
ret = (ret?ret:1.)/(COOC.m2(t)+alpha);
- UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
+ UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
<< ": result not > 0 and <= 1.");
return ret;
}
-
+
template<typename TKN>
void
LexicalPhraseScorer2<TKN>::
@@ -148,9 +148,9 @@ namespace ugdiss
{
x = binread(binread(x,i1),i2);
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += plup_fwd(snt1[i1].id(), snt2[i2].id(),alpha);
+ p1[i1] += plup_fwd(snt1[i1].id(), snt2[i2].id(),alpha);
++c1[i1];
- p2[i2] += plup_bwd(snt1[i1].id(), snt2[i2].id(),alpha);
+ p2[i2] += plup_bwd(snt1[i1].id(), snt2[i2].id(),alpha);
++c2[i2];
}
fwd_score = 0;
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
index 495501bd6..d0522c528 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
@@ -10,26 +10,26 @@ namespace Moses
// bounds LFT and RGT and update the actual bounds L and R; update
// the total count of alignment links in the underlying phrase
// pair
- bool
+ bool
check(vector<ushort> const& v, // alignment row/column
size_t const LFT, size_t const RGT, // hard limits
ushort& L, ushort& R, size_t& count) // current bounds, count
{
if (v.size() == 0) return 0;
- if (L > v.front() && (L=v.front()) < LFT) return false;
+ if (L > v.front() && (L=v.front()) < LFT) return false;
if (R < v.back() && (R=v.back()) > RGT) return false;
count += v.size();
return true;
}
-
+
/// return number of alignment points in box, -1 on failure
- int
+ int
expand_block(vector<vector<ushort> > const& row2col,
vector<vector<ushort> > const& col2row,
size_t row, size_t col, // seed coordinates
- size_t const TOP, size_t const LFT, // hard limits
- size_t const BOT, size_t const RGT, // hard limits
- ushort* top = NULL, ushort* lft = NULL,
+ size_t const TOP, size_t const LFT, // hard limits
+ size_t const BOT, size_t const RGT, // hard limits
+ ushort* top = NULL, ushort* lft = NULL,
ushort* bot = NULL, ushort* rgt = NULL) // store results
{
if (row < TOP || row > BOT || col < LFT || col > RGT) return -1;
@@ -37,7 +37,7 @@ namespace Moses
UTIL_THROW_IF2(col >= col2row.size(), "out of bounds");
// ====================================================
- // tables grow downwards, so TOP is smaller than BOT!
+ // tables grow downwards, so TOP is smaller than BOT!
// ====================================================
ushort T, L, B, R; // box dimensions
@@ -45,7 +45,7 @@ namespace Moses
// if we start on an empty cell, search for the first alignment point
if (row2col[row].size() == 0 && col2row[col].size() == 0)
{
- if (row == TOP) while (row < BOT && !row2col[++row].size());
+ if (row == TOP) while (row < BOT && !row2col[++row].size());
else if (row == BOT) while (row > TOP && !row2col[--row].size());
if (col == LFT) while (col < RGT && !col2row[++col].size());
@@ -54,7 +54,7 @@ namespace Moses
if (row2col[row].size() == 0 && col2row[col].size() == 0)
return 0;
}
- if (row2col[row].size() == 0)
+ if (row2col[row].size() == 0)
row = col2row[col].front();
if (col2row[col].size() == 0)
col = row2col[row].front();
@@ -65,9 +65,9 @@ namespace Moses
if ((R = row2col[row].back()) > RGT) return -1;
if (B == T && R == L) return 1;
-
+
// start/end of row / column coverage:
- ushort rs = row, re = row, cs = col, ce = col;
+ ushort rs = row, re = row, cs = col, ce = col;
int ret = row2col[row].size();
for (size_t tmp = 1; tmp; ret += tmp)
{
@@ -127,7 +127,7 @@ namespace Moses
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
while (s2-- && a2[s2].size() == 0);
-
+
Moses::LRModel::ReorderingType ret;
ret = (a2[s2].size() == 0 ? po_other :
a2[s2].back() < s1 ? Moses::LRModel::DR :
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
index d432ea37e..9004b757e 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
@@ -7,13 +7,13 @@ namespace Moses { namespace bitext {
typedef Moses::LRModel::ReorderingType PhraseOrientation;
-PhraseOrientation
+PhraseOrientation
find_po_fwd(std::vector<std::vector<ushort> >& a1,
std::vector<std::vector<ushort> >& a2,
size_t b1, size_t e1,
size_t b2, size_t e2);
-PhraseOrientation
+PhraseOrientation
find_po_bwd(std::vector<std::vector<ushort> >& a1,
std::vector<std::vector<ushort> >& a2,
size_t b1, size_t e1,
@@ -21,5 +21,5 @@ find_po_bwd(std::vector<std::vector<ushort> >& a1,
-
+
}} // close namespaces
diff --git a/moses/TranslationModel/UG/mm/ug_load_primer.h b/moses/TranslationModel/UG/mm/ug_load_primer.h
index 1cd167a68..961c45da1 100644
--- a/moses/TranslationModel/UG/mm/ug_load_primer.h
+++ b/moses/TranslationModel/UG/mm/ug_load_primer.h
@@ -1,7 +1,7 @@
//-*- c++ -*-
#pragma once
#include <boost/iostreams/device/mapped_file.hpp>
-//
+//
namespace Moses
{
class FastLoader
@@ -14,5 +14,5 @@ namespace Moses
void prime(boost::iostreams::mapped_file_source const& f);
-
+
};
diff --git a/moses/TranslationModel/UG/mm/ug_lru_cache.h b/moses/TranslationModel/UG/mm/ug_lru_cache.h
index d1c9a9767..0000b194f 100644
--- a/moses/TranslationModel/UG/mm/ug_lru_cache.h
+++ b/moses/TranslationModel/UG/mm/ug_lru_cache.h
@@ -30,25 +30,25 @@ namespace lru_cache
// timeval tstamp; // time stamp
typename boost::shared_ptr<VAL> ptr; // cached shared ptr
};
-
+
mutable boost::shared_mutex m_lock;
uint32_t m_qfront, m_qback;
- vector<Record> m_recs;
+ vector<Record> m_recs;
map_t m_idx;
- void
+ void
update_queue(KEY const& key, uint32_t const p)
{
// CALLER MUST LOCK!
- // "remove" item in slot p from it's current position of the
- // queue (which is different from the slot position) and move it
+ // "remove" item in slot p from it's current position of the
+ // queue (which is different from the slot position) and move it
// to the end
Record& r = m_recs[p];
if (m_recs.size() == 1)
r.next = r.prev = m_qback = m_qfront = 0;
-
+
if (r.key != key || p == m_qback) return;
-
+
if (m_qfront == p)
m_qfront = m_recs[r.next].prev = r.next;
else
@@ -65,8 +65,8 @@ namespace lru_cache
size_t capacity() const { return m_recs.capacity(); }
void reserve(size_t s) { m_recs.reserve(s); }
- sptr<VAL>
- get(KEY const& key)
+ sptr<VAL>
+ get(KEY const& key)
{
uint32_t p;
{ // brackets needed for lock scoping
@@ -86,13 +86,13 @@ namespace lru_cache
boost::lock_guard<boost::shared_mutex> lock(m_lock);
pair<typename map_t::iterator,bool> foo;
foo = m_idx.insert(make_pair(key,m_recs.size()));
-
+
uint32_t p = foo.first->second;
if (foo.second) // was not in the cache
{
if (m_recs.size() < m_recs.capacity())
m_recs.push_back(Record());
- else
+ else
{
foo.first->second = p = m_qfront;
m_idx.erase(m_recs[p].key);
diff --git a/moses/TranslationModel/UG/mm/ug_mm_2d_table.h b/moses/TranslationModel/UG/mm/ug_mm_2d_table.h
index cfc86b8fc..2455ca603 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_2d_table.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_2d_table.h
@@ -24,12 +24,12 @@ namespace ugdiss
ID id;
VAL val;
- bool
+ bool
operator<(ID const otherId) const
{
return id < otherId;
}
-
+
bool
operator<(Cell const& other) const
{
@@ -60,14 +60,14 @@ namespace ugdiss
ID numCols;
boost::shared_ptr<bio::mapped_file_source> file;
- VAL m1(ID key) const
- {
- return (key < numRows) ? M1[key] : INIT(0);
+ VAL m1(ID key) const
+ {
+ return (key < numRows) ? M1[key] : INIT(0);
}
VAL m2(ID key) const
{
- return (key < numCols) ? M2[key] : INIT(0);
+ return (key < numCols) ? M2[key] : INIT(0);
}
@@ -106,7 +106,7 @@ namespace ugdiss
Cell const* c = lower_bound(start,stop,key);
return (c != stop && c->id == key ? c->val : INIT(0));
}
-
+
template<typename OFFSET, typename ID, typename VAL, typename INIT>
void
mm2dTable<OFFSET,ID,VAL,INIT>::
@@ -140,10 +140,10 @@ namespace ugdiss
// cout << numRows << " rows; " << numCols << " columns " << endl;
M1 = reinterpret_cast<VAL const*>(index+numRows+1);
M2 = M1+numRows;
- // cout << "Table " << fname << " has " << numRows << " rows and "
+ // cout << "Table " << fname << " has " << numRows << " rows and "
// << numCols << " columns." << endl;
- // cout << "File size is " << file.size()*1024 << " bytes; ";
- // cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
+ // cout << "File size is " << file.size()*1024 << " bytes; ";
+ // cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
// << " bytes into the file" << endl;
// cout << M2[0] << endl;
}
@@ -156,8 +156,8 @@ namespace ugdiss
typename ICONT // inner container type
>
void
- write_mm_2d_table(ostream& out, vector<ICONT> const& T,
- vector<VAL> const* m1 = NULL,
+ write_mm_2d_table(ostream& out, vector<ICONT> const& T,
+ vector<VAL> const* m1 = NULL,
vector<VAL> const* m2 = NULL)
{
assert(T.size());
@@ -223,7 +223,7 @@ namespace ugdiss
OFFSET o = index[i]; // (index[i]-index[0])/sizeof(VAL);
out.write(reinterpret_cast<char*>(&o),sizeof(OFFSET));
}
-
+
// write marginals
out.write(reinterpret_cast<char const*>(&(*m1)[0]),m1->size()*sizeof(VAL));
out.write(reinterpret_cast<char const*>(&(*m2)[0]),m2->size()*sizeof(VAL));
diff --git a/moses/TranslationModel/UG/mm/ug_mm_bitext.h b/moses/TranslationModel/UG/mm/ug_mm_bitext.h
index 5b18ff1fa..be3fdfce8 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_bitext.h
@@ -3,7 +3,7 @@
namespace Moses
{
- namespace bitext
+ namespace bitext
{
template<typename TKN>
class mmBitext : public Bitext<TKN>
@@ -17,18 +17,18 @@ namespace Moses
template<typename TKN>
mmBitext<TKN>::
mmBitext()
- : Bitext<TKN>(new mmTtrack<TKN>(), new mmTtrack<TKN>(), new mmTtrack<char>(),
- new TokenIndex(), new TokenIndex(),
+ : Bitext<TKN>(new mmTtrack<TKN>(), new mmTtrack<TKN>(), new mmTtrack<char>(),
+ new TokenIndex(), new TokenIndex(),
new mmTSA<TKN>(), new mmTSA<TKN>())
{};
-
+
template<typename TKN>
void
mmBitext<TKN>::
load_document_map(string const& fname)
{
ifstream docmap(fname.c_str());
- // the docmap file should list the documents in the corpus
+ // the docmap file should list the documents in the corpus
// in the order in which they appear with one line per document:
// <docname> <number of lines / sentences>
//
@@ -38,22 +38,22 @@ namespace Moses
this->m_sid2docid.reset(new vector<id_type>(this->T1->size()));
while(getline(docmap,buffer))
{
- istringstream line(buffer);
+ istringstream line(buffer);
if (!(line>>docname)) continue; // empty line
if (docname.size() && docname[0] == '#') continue; // comment
size_t docid = this->m_docname2docid.size();
this->m_docname2docid[docname] = docid;
this->m_docname.push_back(docname);
line >> b;
- VERBOSE(1, "DOCUMENT MAP " << docname
+ VERBOSE(1, "DOCUMENT MAP " << docname
<< " " << a << "-" << b+a << endl);
for (b += a; a < b; ++a)
(*this->m_sid2docid)[a] = docid;
}
- UTIL_THROW_IF2(b != this->T1->size(),
+ UTIL_THROW_IF2(b != this->T1->size(),
"Document map doesn't match corpus!");
}
-
+
template<typename TKN>
void
mmBitext<TKN>::
@@ -77,6 +77,6 @@ namespace Moses
if (!access(docmapfile.c_str(),F_OK))
load_document_map(docmapfile);
}
-
+
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_mm_tsa.h b/moses/TranslationModel/UG/mm/ug_mm_tsa.h
index 9d5038e26..ff2d4c693 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_tsa.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_tsa.h
@@ -40,7 +40,7 @@ namespace ugdiss
char const* index_jump(char const* a, char const* z, float ratio) const;
char const* getLowerBound(id_type t) const;
char const* getUpperBound(id_type t) const;
-
+
public:
mmTSA();
mmTSA(string fname, Ttrack<TOKEN> const* c);
@@ -53,24 +53,24 @@ namespace ugdiss
rawCnt(char const* p, char const * const q) const;
void
- getCounts(char const* p, char const * const q,
+ getCounts(char const* p, char const * const q,
count_type& sids, count_type& raw) const;
- char const*
+ char const*
readSid(char const* p, char const* q, id_type& sid) const;
- char const*
+ char const*
readSid(char const* p, char const* q, ::uint64_t& sid) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, uint16_t& offset) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, ::uint64_t& offset) const;
void sanityCheck() const;
- };
+ };
// ======================================================================
@@ -78,13 +78,13 @@ namespace ugdiss
* assumes that keys are flagged with '1', values with '0'
*/
template<typename TOKEN>
- char const*
+ char const*
mmTSA<TOKEN>::
index_jump(char const* a, char const* z, float ratio) const
{
assert(ratio >= 0 && ratio < 1);
char const* m = a+int(ratio*(z-a));
- if (m > a)
+ if (m > a)
{
while (m > a && *m < 0) --m;
while (m > a && *m >= 0) --m;
@@ -98,7 +98,7 @@ namespace ugdiss
template<typename TOKEN>
mmTSA<TOKEN>::
- mmTSA()
+ mmTSA()
{
this->startArray = NULL;
this->endArray = NULL;
@@ -136,9 +136,9 @@ namespace ugdiss
filepos_type idxOffset;
p = numread(p,idxOffset);
p = numread(p,this->indexSize);
-
+
// cerr << fname << ": " << idxOffset << " " << this->indexSize << endl;
-
+
this->startArray = p;
this->index = reinterpret_cast<filepos_type const*>(file.data()+idxOffset);
this->endArray = reinterpret_cast<char const*>(index);
@@ -153,7 +153,7 @@ namespace ugdiss
mmTSA<TOKEN>::
getLowerBound(id_type id) const
{
- if (id >= this->indexSize)
+ if (id >= this->indexSize)
return NULL;
return this->startArray + this->index[id];
}
@@ -165,7 +165,7 @@ namespace ugdiss
mmTSA<TOKEN>::
getUpperBound(id_type id) const
{
- if (id >= this->indexSize)
+ if (id >= this->indexSize)
return NULL;
// if (index[id] == index[id+1])
// return NULL;
@@ -232,13 +232,13 @@ namespace ugdiss
}
return ret;
}
-
+
// ======================================================================
template<typename TOKEN>
- void
+ void
mmTSA<TOKEN>::
- getCounts(char const* p, char const* const q,
+ getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const
{
raw = 0;
diff --git a/moses/TranslationModel/UG/mm/ug_mm_ttrack.h b/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
index 51ba21778..bfee14e3e 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
@@ -26,7 +26,7 @@ namespace ugdiss
{
using namespace std;
namespace bio=boost::iostreams;
-
+
template<typename TKN=id_type>
class mmTtrack : public Ttrack<TKN>
{
@@ -38,21 +38,21 @@ namespace ugdiss
id_type numWords;
bio::mapped_file_source file;
Token const* data; // pointer to first word of first sentence
- id_type const* index; /* pointer to index (change data type for corpora
+ id_type const* index; /* pointer to index (change data type for corpora
* of more than four billion words)
*/
public:
mmTtrack(string fname);
mmTtrack();
- // return pointer to beginning of sentence
- Token const* sntStart(size_t sid) const;
+ // return pointer to beginning of sentence
+ Token const* sntStart(size_t sid) const;
- // return pointer to end of sentence
- Token const* sntEnd(size_t sid) const;
+ // return pointer to end of sentence
+ Token const* sntEnd(size_t sid) const;
// return size of corpus (in number of sentences)
- size_t size() const;
+ size_t size() const;
// return size of corpus (in number of sentences)
size_t numTokens() const;
@@ -60,23 +60,23 @@ namespace ugdiss
// open an mmTtrack file
void open(string fname);
- // FUNCTIONS FOR BUILDING CORPUS TRACKS
- // write a blank file header at the beginning of a new ttrack file
+ // FUNCTIONS FOR BUILDING CORPUS TRACKS
+ // write a blank file header at the beginning of a new ttrack file
void write_blank_file_header(ostream& out) const;
// write the sentence index /idx/ and fill the file header
- void write_index_and_finalize(ostream& out,
+ void write_index_and_finalize(ostream& out,
vector<id_type> const& idx,
count_type tokenCount) const;
// copy a contiguous sequence of sentences to another stream
// return the number of tokens copied
id_type copySentences(ostream& trg, id_type start, id_type stop) const;
-
+
/** find the sentence id of a given token */
- id_type findSid(TKN const* t) const;
+ id_type findSid(TKN const* t) const;
- id_type findSid(id_type tokenOffset) const;
+ id_type findSid(id_type tokenOffset) const;
/// re-assign ids based on the id maps in /f/
void remap(string const fname, vector<id_type const*> const & f) const;
@@ -88,7 +88,7 @@ namespace ugdiss
void
mmTtrack<TKN>::
remap(string const fname, vector<id_type const*> const & f) const
- {
+ {
bio::mapped_file myfile(fname);
assert(myfile.is_open());
Moses::prime(myfile);
@@ -110,7 +110,7 @@ namespace ugdiss
mmTtrack<TKN>::
size() const
{
- return this->numSent;
+ return this->numSent;
}
template<typename TKN>
@@ -118,17 +118,17 @@ namespace ugdiss
mmTtrack<TKN>::
numTokens() const
{
- return this->numWords;
+ return this->numWords;
}
template<typename TKN>
- TKN const*
+ TKN const*
mmTtrack<TKN>::
sntStart(size_t sid) const // return pointer to beginning of sentence
{
if (sid >= this->numSent)
{
- cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
+ cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
<< this->numSent <<")" << endl;
}
assert(sid < this->numSent);
@@ -136,14 +136,14 @@ namespace ugdiss
}
template<typename TKN>
- TKN const*
+ TKN const*
mmTtrack<TKN>::
sntEnd(size_t sid) const // return pointer to end of sentence
{
assert(sid < this->numSent);
return data+index[sid+1];
}
-
+
template<typename TKN>
mmTtrack<TKN>::
mmTtrack()
@@ -161,7 +161,7 @@ namespace ugdiss
}
template<typename TKN>
- void
+ void
mmTtrack<TKN>::
open(string fname)
{
@@ -235,7 +235,7 @@ namespace ugdiss
}
template<typename TKN>
- id_type
+ id_type
mmTtrack<TKN>::
copySentences(ostream& trg, id_type start, id_type stop) const
{
diff --git a/moses/TranslationModel/UG/mm/ug_mmbitext.cc b/moses/TranslationModel/UG/mm/ug_mmbitext.cc
index 2c00665bb..34e3f1b1e 100644
--- a/moses/TranslationModel/UG/mm/ug_mmbitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_mmbitext.cc
@@ -21,7 +21,7 @@
// ++this->in_progress;
// this->lock.unlock();
// }
-
+
// void
// pstats::
// release()
@@ -52,7 +52,7 @@
// mmbitext()
// : ag(NULL)
// {
-
+
// }
// bool
@@ -78,13 +78,13 @@
// {
// if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
// else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
-// if (src < start || src >= stop)
+// if (src < start || src >= stop)
// forbidden.set(trg);
// else
// {
// lft = min(lft,trg);
// rgt = max(rgt,trg);
-// if (core_alignment)
+// if (core_alignment)
// {
// if (flip) aln[trg].push_back(src);
// else aln[src].push_back(trg);
@@ -101,16 +101,16 @@
// }
// cout << endl;
// #endif
-
+
// for (size_t i = lft; i <= rgt; ++i)
-// if (forbidden[i])
+// if (forbidden[i])
// return false;
-
+
// s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
// e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
-
+
// if (lft > rgt) return false;
-// if (core_alignment)
+// if (core_alignment)
// {
// core_alignment->clear();
// if (flip)
@@ -147,11 +147,11 @@
// prep2(phrase);
// }
-// sptr<mmbitext::pstats>
+// sptr<mmbitext::pstats>
// mmbitext::
// prep2(iter const& phrase)
// {
-// if (!ag)
+// if (!ag)
// {
// ag = new agenda(*this);
// ag->add_workers(20);
@@ -197,11 +197,11 @@
// continue;
// }
-// stats->lock.lock();
-// stats->good += 1;
+// stats->lock.lock();
+// stats->good += 1;
// stats->lock.unlock();
-// for (size_t k = 0; k < aln.size(); k += 2)
+// for (size_t k = 0; k < aln.size(); k += 2)
// aln[k] += s2 - s1;
// Token const* o = (fwd ? ag.bitext.T2 : ag.bitext.T1).sntStart(sid);
// float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
@@ -215,14 +215,14 @@
// stats->add(b,sample_weight,aln);
// if (i < e2) assert(b.extend(o[i].id()));
// }
-// if (fwd && s < s2)
-// for (size_t k = 0; k < aln.size(); k += 2)
+// if (fwd && s < s2)
+// for (size_t k = 0; k < aln.size(); k += 2)
// --aln[k];
// }
// stats->release();
// }
// }
-
+
// void
// mmbitext::
// pstats::
@@ -239,7 +239,7 @@
// agenda(mmbitext const& thebitext)
// : shutdown(false), doomed(0), bitext(thebitext)
// {
-
+
// }
// mmbitext::
@@ -259,13 +259,13 @@
// {
// if (ag) delete ag;
// }
-
+
// sptr<mmbitext::pstats>
// mmbitext::
// agenda::
// add_job(mmbitext::iter const& phrase, size_t const max_samples)
// {
-// static boost::posix_time::time_duration nodelay(0,0,0,0);
+// static boost::posix_time::time_duration nodelay(0,0,0,0);
// job j;
// j.stats.reset(new mmbitext::pstats());
@@ -296,11 +296,11 @@
// bool
// mmbitext::
// agenda::
-// get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
+// get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
// bool & fwd, sptr<mmbitext::pstats> & stats)
// {
// boost::unique_lock<boost::mutex> lock(this->lock);
-// if (this->doomed || this->shutdown)
+// if (this->doomed || this->shutdown)
// {
// if (this->doomed) --this->doomed;
// return false;
@@ -309,7 +309,7 @@
// // {
// // cerr << "no jobs" << endl;
// // this->ready.wait(lock);
-// // if (this->doomed || this->shutdown)
+// // if (this->doomed || this->shutdown)
// // {
// // if (this->doomed) --this->doomed;
// // return false;
@@ -346,7 +346,7 @@
// boost::lock_guard<boost::mutex> lock(stats->lock);
// if (stats->raw_cnt == ctr) ++stats->raw_cnt;
// size_t rnum = util::rand_excl(stats->raw_cnt - ctr++);
-// // cout << stats->raw_cnt << " " << ctr-1 << " "
+// // cout << stats->raw_cnt << " " << ctr-1 << " "
// // << rnum << " " << max_samples - stats->good << endl;
// if (rnum < max_samples - stats->good)
// {
@@ -364,7 +364,7 @@
// agenda::
// add_workers(int n)
// {
-// static boost::posix_time::time_duration nodelay(0,0,0,0);
+// static boost::posix_time::time_duration nodelay(0,0,0,0);
// boost::lock_guard<boost::mutex> lock(this->lock);
// // house keeping: remove all workers that have finished
// for (size_t i = 0; i < workers.size(); )
@@ -377,7 +377,7 @@
// }
// else ++i;
// }
-// if (n < 0)
+// if (n < 0)
// {
// this->doomed -= n;
// }
@@ -394,8 +394,8 @@
// mmbitext::
// jstats::
// jstats()
-// {
-// my_aln.reserve(1);
+// {
+// my_aln.reserve(1);
// }
// mmbitext::
@@ -406,8 +406,8 @@
// my_wcnt = other.wcnt();
// my_aln = other.aln();
// }
-
-// void
+
+// void
// mmbitext::
// jstats::
// add(float w, vector<uchar> const& a)
@@ -419,7 +419,7 @@
// {
// size_t i = 0;
// while (i < my_aln.size() && my_aln[i].second != a) ++i;
-// if (i == my_aln.size())
+// if (i == my_aln.size())
// my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
// else
// my_aln[i].first++;
@@ -431,7 +431,7 @@
// uint32_t
// mmbitext::
// jstats::
-// rcnt() const
+// rcnt() const
// { return my_rcnt; }
// float
@@ -443,7 +443,7 @@
// vector<pair<size_t, vector<uchar> > > const&
// mmbitext::
// jstats::
-// aln() const
+// aln() const
// { return my_aln; }
// }
diff --git a/moses/TranslationModel/UG/mm/ug_mmbitext.h b/moses/TranslationModel/UG/mm/ug_mmbitext.h
index e7378e7f6..3837abc59 100644
--- a/moses/TranslationModel/UG/mm/ug_mmbitext.h
+++ b/moses/TranslationModel/UG/mm/ug_mmbitext.h
@@ -4,10 +4,10 @@
// Written by Ulrich Germann
// things we can do to speed up things:
-// - set up threads at startup time that force the
+// - set up threads at startup time that force the
// data in to memory sequentially
//
-// - use multiple agendas for better load balancing and to avoid
+// - use multiple agendas for better load balancing and to avoid
// competition for locks
#include <string>
@@ -46,8 +46,8 @@ namespace Moses {
class jstats; // phrase pair ("joint") statistics
class agenda
{
- boost::mutex lock;
- boost::condition_variable ready;
+ boost::mutex lock;
+ boost::condition_variable ready;
class job;
class worker;
list<job> joblist;
@@ -59,9 +59,9 @@ namespace Moses {
agenda(mmbitext const& bitext);
~agenda();
void add_workers(int n);
- sptr<pstats> add_job(mmbitext::iter const& phrase,
+ sptr<pstats> add_job(mmbitext::iter const& phrase,
size_t const max_samples);
- bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
+ bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
bool & fwd, sptr<mmbitext::pstats> & stats);
};
@@ -72,22 +72,22 @@ namespace Moses {
mmTtrack<char> Tx; // word alignments
mmTtrack<Token> T1,T2; // token tracks
TokenIndex V1,V2; // vocabs
- mmTSA<Token> I1,I2; // suffix arrays
+ mmTSA<Token> I1,I2; // suffix arrays
/// given the source phrase sid[start:stop]
- // find the possible start (s1 .. s2) and end (e1 .. e2)
+ // find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
- // alignments in *core_alignment. If /flip/, source phrase is
+ // alignments in *core_alignment. If /flip/, source phrase is
// L2.
- bool
+ bool
find_trg_phr_bounds
- (size_t const sid, size_t const start, size_t const stop,
- size_t & s1, size_t & s2, size_t & e1, size_t & e2,
+ (size_t const sid, size_t const start, size_t const stop,
+ size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
- sptr<pstats>
+ sptr<pstats>
prep2(iter const& phrase);
public:
mmbitext();
@@ -105,8 +105,8 @@ namespace Moses {
jstats
{
uint32_t my_rcnt; // unweighted count
- float my_wcnt; // weighted count
- vector<pair<size_t, vector<uchar> > > my_aln;
+ float my_wcnt; // weighted count
+ vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
@@ -117,7 +117,7 @@ namespace Moses {
void add(float w, vector<uchar> const& a);
};
- // struct
+ // struct
// mmbitext:
// phrasepair
// {
@@ -125,32 +125,32 @@ namespace Moses {
// size_t len;
// size_t cnt;
// float fwd, bwd;
-
+
// map<uint32_t,uint32_t> aln;
// string toString(TokenIndex const& V) const;
// bool operator<(phrase const& other) const;
// bool operator>(phrase const& other) const;
// phrase(pair<pair<Token const*, size_t>,jstats> const & foo);
-
+
// };
- struct
+ struct
mmbitext::
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
- size_t raw_cnt; // (approximate) raw occurrence count
+ size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
- // size_t snt_cnt;
+ // size_t snt_cnt;
// size_t sample_snt;
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
- pstats();
+ pstats();
// vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
@@ -167,7 +167,7 @@ namespace Moses {
public:
worker(agenda& a);
void operator()();
-
+
};
class
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.cc b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
index ec3423fdc..d533dafa3 100644
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.cc
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
@@ -3,10 +3,10 @@
namespace Moses {
namespace bitext {
-void
+void
fill_lr_vec2
-( LRModel::ModelType mdl, float const* const cnt,
- float const total, float* v)
+( LRModel::ModelType mdl, float const* const cnt,
+ float const total, float* v)
{
if (mdl == LRModel::Monotonic)
{
@@ -23,17 +23,17 @@ fill_lr_vec2
else if (mdl == LRModel::MSD)
{
float denom = log(total + 3);
- v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
- v[LRModel::S] = log(cnt[LRModel::S] + 1) - denom;
- v[LRModel::D] = log(cnt[LRModel::DR] +
+ v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
+ v[LRModel::S] = log(cnt[LRModel::S] + 1) - denom;
+ v[LRModel::D] = log(cnt[LRModel::DR] +
cnt[LRModel::DL] + 1) - denom;
}
else if (mdl == LRModel::MSLR)
{
float denom = log(total + 4);
- v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
+ v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
v[LRModel::S] = log(cnt[LRModel::S] + 1) - denom;
- v[LRModel::DL] = log(cnt[LRModel::DL] + 1) - denom;
+ v[LRModel::DL] = log(cnt[LRModel::DL] + 1) - denom;
v[LRModel::DR] = log(cnt[LRModel::DR] + 1) - denom;
}
else UTIL_THROW2("Reordering type not recognized!");
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h
index 70d4b0d82..53a9f761c 100644
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.h
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@@ -11,7 +11,7 @@ namespace Moses
namespace bitext
{
template<typename Token>
- class
+ class
PhrasePair
{
public:
@@ -36,24 +36,24 @@ namespace Moses
bool operator<(PhrasePair const& other) const;
bool operator>(PhrasePair const& other) const;
- bool operator<=(PhrasePair const& other) const;
+ bool operator<=(PhrasePair const& other) const;
bool operator>=(PhrasePair const& other) const;
void init();
- void init(uint64_t const pid1, bool is_inverse,
+ void init(uint64_t const pid1, bool is_inverse,
Token const* x, uint32_t const len,
pstats const* ps = NULL, size_t const numfeats=0);
- PhrasePair const&
- update(uint64_t const pid2, Token const* x,
+ PhrasePair const&
+ update(uint64_t const pid2, Token const* x,
uint32_t const len, jstats const& js);
void
- fill_lr_vec(LRModel::Direction const& dir,
- LRModel::ModelType const& mdl,
+ fill_lr_vec(LRModel::Direction const& dir,
+ LRModel::ModelType const& mdl,
vector<float>& v) const;
void
- print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
+ print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const;
class SortByTargetIdSeq
@@ -62,7 +62,7 @@ namespace Moses
int cmp(PhrasePair const& a, PhrasePair const& b) const;
bool operator()(PhrasePair const& a, PhrasePair const& b) const;
};
-
+
class SortDescendingByJointCount
{
public:
@@ -73,8 +73,8 @@ namespace Moses
template<typename Token>
void PhrasePair<Token>
- ::init(uint64_t const pid1, bool is_inverse,
- Token const* x, uint32_t const len,
+ ::init(uint64_t const pid1, bool is_inverse,
+ Token const* x, uint32_t const len,
pstats const* ps, size_t const numfeats)
{
inverse = is_inverse;
@@ -98,15 +98,15 @@ namespace Moses
template<typename Token>
PhrasePair<Token> const&
PhrasePair<Token>
- ::update(uint64_t const pid2,
- Token const* x, uint32_t const len, jstats const& js)
+ ::update(uint64_t const pid2,
+ Token const* x, uint32_t const len, jstats const& js)
{
p2 = pid2;
start2 = x; len2 = len;
raw2 = js.cnt2();
joint = js.rcnt();
assert(js.aln().size());
- if (js.aln().size())
+ if (js.aln().size())
aln = js.aln()[0].second;
// float total_fwd = 0, total_bwd = 0;
// for (int i = 0; i <= Moses::LRModel::NONE; i++)
@@ -123,48 +123,48 @@ namespace Moses
dfwd[i] = js.dcnt_fwd(po);
dbwd[i] = js.dcnt_bwd(po);
}
-
+
indoc = js.indoc;
return *this;
}
template<typename Token>
- bool
+ bool
PhrasePair<Token>
- ::operator<(PhrasePair const& other) const
- {
- return this->score < other.score;
+ ::operator<(PhrasePair const& other) const
+ {
+ return this->score < other.score;
}
-
+
template<typename Token>
- bool
+ bool
PhrasePair<Token>
::operator>(PhrasePair const& other) const
- {
- return this->score > other.score;
+ {
+ return this->score > other.score;
}
template<typename Token>
- bool
+ bool
PhrasePair<Token>
- ::operator<=(PhrasePair const& other) const
- {
- return this->score <= other.score;
+ ::operator<=(PhrasePair const& other) const
+ {
+ return this->score <= other.score;
}
-
+
template<typename Token>
- bool
+ bool
PhrasePair<Token>
::operator>=(PhrasePair const& other) const
- {
- return this->score >= other.score;
+ {
+ return this->score >= other.score;
}
template<typename Token>
PhrasePair<Token> const&
PhrasePair<Token>
- ::operator+=(PhrasePair const& o)
- {
+ ::operator+=(PhrasePair const& o)
+ {
raw1 += o.raw1;
raw2 += o.raw2;
good1 += o.good1;
@@ -178,16 +178,16 @@ namespace Moses
template<typename Token>
PhrasePair<Token>
- ::PhrasePair(PhrasePair<Token> const& o)
+ ::PhrasePair(PhrasePair<Token> const& o)
: start1(o.start1) , start2(o.start2)
, len1(o.len1) , len2(o.len2)
, p1(o.p1) , p2(o.p2)
- , raw1(o.raw1) , raw2(o.raw2)
+ , raw1(o.raw1) , raw2(o.raw2)
, sample1(o.sample1) , sample2(o.sample2)
, good1(o.good1) , good2(o.good2)
- , joint(o.joint)
+ , joint(o.joint)
, fvals(o.fvals)
- , aln(o.aln)
+ , aln(o.aln)
, score(o.score)
, inverse(o.inverse)
, indoc(o.indoc)
@@ -198,7 +198,7 @@ namespace Moses
dbwd[i] = o.dbwd[i];
}
}
-
+
template<typename Token>
int PhrasePair<Token>
::SortByTargetIdSeq
@@ -207,7 +207,7 @@ namespace Moses
size_t i = 0;
Token const* x = a.start2;
Token const* y = b.start2;
- while (i < a.len2 && i < b.len2 && x->id() == y->id())
+ while (i < a.len2 && i < b.len2 && x->id() == y->id())
{
x = x->next();
y = y->next();
@@ -218,7 +218,7 @@ namespace Moses
if (i == b.len2) return 1;
return x->id() < y->id() ? -1 : 1;
}
-
+
template<typename Token>
bool PhrasePair<Token>
::SortByTargetIdSeq
@@ -237,16 +237,16 @@ namespace Moses
}
template<typename Token>
- bool
+ bool
PhrasePair<Token>
::SortDescendingByJointCount
::operator()(PhrasePair const& a, PhrasePair const& b) const
{
return this->cmp(a,b) < 0;
}
-
+
template<typename Token>
- void
+ void
PhrasePair<Token>
::init()
{
@@ -257,21 +257,21 @@ namespace Moses
}
- void
- fill_lr_vec2(LRModel::ModelType mdl, float const* const cnt,
+ void
+ fill_lr_vec2(LRModel::ModelType mdl, float const* const cnt,
float const total, float* v);
-
+
template<typename Token>
void
PhrasePair<Token>
- ::fill_lr_vec(LRModel::Direction const& dir,
- LRModel::ModelType const& mdl,
+ ::fill_lr_vec(LRModel::Direction const& dir,
+ LRModel::ModelType const& mdl,
vector<float>& v) const
{
// how many distinct scores do we have?
size_t num_scores = (mdl == LRModel::MSLR ? 4 : mdl == LRModel::MSD ? 3 : 2);
size_t offset;
- if (dir == LRModel::Bidirectional)
+ if (dir == LRModel::Bidirectional)
{
offset = num_scores;
num_scores *= 2;
@@ -281,32 +281,32 @@ namespace Moses
v.resize(num_scores);
// determine the denominator
- float total = 0;
- for (size_t i = 0; i <= LRModel::NONE; ++i)
+ float total = 0;
+ for (size_t i = 0; i <= LRModel::NONE; ++i)
total += dfwd[i];
if (dir != LRModel::Forward) // i.e., Backward or Bidirectional
fill_lr_vec2(mdl, dbwd, total, &v[0]);
if (dir != LRModel::Backward) // i.e., Forward or Bidirectional
fill_lr_vec2(mdl, dfwd, total, &v[offset]);
- }
-
+ }
+
template<typename Token>
void
PhrasePair<Token>
- ::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
+ ::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const
{
- out << toString (V1, this->start1, this->len1) << " ::: "
- << toString (V2, this->start2, this->len2) << " "
+ out << toString (V1, this->start1, this->len1) << " ::: "
+ << toString (V2, this->start2, this->len2) << " "
<< this->joint << " [";
for (size_t i = 0; i < this->indoc.size(); ++i)
- {
- if (i) out << " ";
- out << this->indoc[i];
+ {
+ if (i) out << " ";
+ out << this->indoc[i];
}
- out << "] [";
+ out << "] [";
vector<float> lrscores;
this->fill_lr_vec(LR.GetDirection(), LR.GetModelType(), lrscores);
for (size_t i = 0; i < lrscores.size(); ++i)
@@ -322,7 +322,7 @@ namespace Moses
if (i) *log << " ";
*log << p.dfwd[i];
}
- *log << "] [";
+ *log << "] [";
for (int i = 0; i <= Moses::LRModel::NONE; i++)
{
// PhraseOrientation po = static_cast<PhraseOrientation>(i);
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
index fea57e719..95b93ec7b 100644
--- a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
@@ -1,7 +1,7 @@
#include "ug_sampling_bias.h"
#include <iostream>
#include <boost/foreach.hpp>
-
+
#ifdef HAVE_CURLPP
#include <curlpp/Options.hpp>
#include <curlpp/cURLpp.hpp>
@@ -15,11 +15,11 @@ namespace Moses
using ugdiss::id_type;
#ifdef HAVE_CURLPP
- std::string
+ std::string
query_bias_server(std::string const& url, std::string const& text)
{
// communicate with the bias server; resuts will be in ...
- std::ostringstream os;
+ std::ostringstream os;
curlpp::Easy myRequest;
std::string query = url+curlpp::escape(text);
myRequest.setOpt(new curlpp::options::Url(query));
@@ -32,7 +32,7 @@ namespace Moses
DocumentBias
::DocumentBias
- ( std::vector<id_type> const& sid2doc,
+ ( std::vector<id_type> const& sid2doc,
std::map<std::string,id_type> const& docname2docid,
std::string const& server_url, std::string const& text,
std::ostream* log)
@@ -45,15 +45,15 @@ namespace Moses
#endif
}
- void
+ void
DocumentBias
::init_from_json
( std::string const& json, std::map<std::string,id_type> const& docname2docid,
std::ostream* log)
- { // poor man's special purpose json parser for responses from the
+ { // poor man's special purpose json parser for responses from the
// MMT bias server
-
- std::string d; float total = 0; std::map<std::string,float> bias;
+
+ std::string d; float total = 0; std::map<std::string,float> bias;
size_t i = 0; while (i < json.size() && json[i] != '"') ++i;
while (++i < json.size())
{
@@ -61,34 +61,34 @@ namespace Moses
if (i >= json.size()) break;
float& f = bias[json.substr(k,i-k)];
while (++i < json.size() && json[i] != ':');
- k = ++i;
+ k = ++i;
while (++i < json.size() && json[i] != ',' && json[i] != '}');
total += (f = atof(json.substr(k, i-k).c_str()));
k = ++i; while (i < json.size() && json[i] != '"') ++i;
}
-
+
typedef std::pair<std::string const,float> item;
- if (total) { BOOST_FOREACH(item& x, bias) { x.second /= total; } }
+ if (total) { BOOST_FOREACH(item& x, bias) { x.second /= total; } }
if (log)
{
- BOOST_FOREACH(item& x, bias)
+ BOOST_FOREACH(item& x, bias)
{
std::map<std::string,id_type>::const_iterator m;
m = docname2docid.find(x.first);
int docid = m != docname2docid.end() ? m->second : -1;
- *log << "CONTEXT SERVER RESPONSE "
+ *log << "CONTEXT SERVER RESPONSE "
<< "[" << docid << "] "
- << x.first << " " << x.second << std::endl;
+ << x.first << " " << x.second << std::endl;
}
}
init(bias, docname2docid);
-
+
// using xmlrpc_parse_json didn't always work (parser errors)
// xmlrpc_value* b = xmlrpc_parse_json(env ,buf.str().c_str());
- // std::cerr << "|" << buf.str() << "|" << std::endl;
- // // if (b == NULL) std::cerr << "OOpS" << std::endl;
+ // std::cerr << "|" << buf.str() << "|" << std::endl;
+ // // if (b == NULL) std::cerr << "OOpS" << std::endl;
// xmlrpc_c::value_struct v(b); // = *b;
- // std::map<std::string, xmlrpc_c::value> const
+ // std::map<std::string, xmlrpc_c::value> const
// bmap = static_cast<map<std::string, xmlrpc_c::value> >(v);
// std::map<std::string, float> bias;
// typedef std::map<std::string, xmlrpc_c::value>::value_type item;
@@ -99,11 +99,11 @@ namespace Moses
// }
// typedef std::map<std::string, float>::value_type fitem;
// BOOST_FOREACH(fitem const& x, bias)
- // std::cerr << x.first << " " << x.second/total << std::endl;
+ // std::cerr << x.first << " " << x.second/total << std::endl;
// // delete b;
}
- void
+ void
DocumentBias
::init(std::map<std::string,float> const& biasmap,
std::map<std::string,id_type> const& docname2docid)
@@ -119,60 +119,60 @@ namespace Moses
BOOST_FOREACH(doc_record const& d, docname2docid)
std::cerr << "BIAS " << d.first << " " << m_bias[d.second] << std::endl;
}
-
- id_type
+
+ id_type
DocumentBias
::GetClass(id_type const idx) const
- {
- return m_sid2docid.at(idx);
+ {
+ return m_sid2docid.at(idx);
}
-
- float
+
+ float
DocumentBias
- ::operator[](id_type const idx) const
- {
- UTIL_THROW_IF2(idx >= m_sid2docid.size(),
+ ::operator[](id_type const idx) const
+ {
+ UTIL_THROW_IF2(idx >= m_sid2docid.size(),
"Out of bounds: " << idx << "/" << m_sid2docid.size());
return m_bias[m_sid2docid[idx]];
}
- size_t
+ size_t
DocumentBias
- ::size() const
+ ::size() const
{ return m_sid2docid.size(); }
SentenceBias
- ::SentenceBias(std::vector<float> const& bias)
+ ::SentenceBias(std::vector<float> const& bias)
: m_bias(bias) { }
SentenceBias
::SentenceBias(size_t const s) : m_bias(s) { }
- id_type
+ id_type
SentenceBias
::GetClass(id_type idx) const { return idx; }
- float&
+ float&
SentenceBias
- ::operator[](id_type const idx)
+ ::operator[](id_type const idx)
{
UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds");
return m_bias[idx];
}
- float
+ float
SentenceBias
- ::operator[](id_type const idx) const
- {
+ ::operator[](id_type const idx) const
+ {
UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds");
return m_bias[idx];
}
-
- size_t
+
+ size_t
SentenceBias
::size() const { return m_bias.size(); }
-
+
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.h b/moses/TranslationModel/UG/mm/ug_sampling_bias.h
index faed69e63..f540ddc76 100644
--- a/moses/TranslationModel/UG/mm/ug_sampling_bias.h
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.h
@@ -15,54 +15,54 @@ namespace Moses
std::string query_bias_server(std::string const& url, std::string const& text);
- class SamplingBias
+ class SamplingBias
{
public:
int loglevel;
std::ostream* log;
- virtual float
+ virtual float
operator[](id_type const ID) const = 0;
// returns (unnormalized bias) for the class of item ID
- virtual size_t size() const = 0;
+ virtual size_t size() const = 0;
// number of classes
-
- virtual id_type
+
+ virtual id_type
GetClass(id_type const ID) const = 0;
// returns class of item ID
};
-
+
class
DocumentBias : public SamplingBias
{
std::vector<id_type> const& m_sid2docid;
std::vector<float> m_bias;
-
+
public:
-
+
DocumentBias(std::vector<id_type> const& sid2doc,
std::map<std::string,id_type> const& docname2docid,
std::string const& server_url, std::string const& text,
std::ostream* log);
- void
- init_from_json
- ( std::string const& json,
+ void
+ init_from_json
+ ( std::string const& json,
std::map<std::string,id_type> const& docname2docid,
std::ostream* log );
-
- void
+
+ void
init
( std::map<std::string,float> const& biasmap,
std::map<std::string,id_type> const& docname2docid);
-
- id_type
+
+ id_type
GetClass(id_type const idx) const;
- float
+ float
operator[](id_type const idx) const;
- size_t
+ size_t
size() const;
};
@@ -76,10 +76,10 @@ namespace Moses
id_type GetClass(id_type idx) const;
- float& operator[](id_type const idx);
- float operator[](id_type const idx) const;
+ float& operator[](id_type const idx);
+ float operator[](id_type const idx) const;
size_t size() const;
-
+
};
}
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h b/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h
index 034a74bd9..3af929644 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h
@@ -1,13 +1,13 @@
// -*- c++ -*-
// (c) 2007-2010 Ulrich Germann
// implementation of stuff related to ArrayEntries
-// this file should only be included via ug_tsa_base.h,
+// this file should only be included via ug_tsa_base.h,
// never by itself
#ifndef __ug_tsa_array_entry_h
#define __ug_tsa_array_entry_h
#include "ug_ttrack_position.h"
-namespace ugdiss
+namespace ugdiss
{
namespace tsa
{
@@ -20,7 +20,7 @@ namespace ugdiss
ArrayEntry();
ArrayEntry(char const* p);
-
+
template<typename TSA_TYPE>
ArrayEntry(TSA_TYPE const* S, char const* p);
@@ -34,7 +34,7 @@ namespace ugdiss
}
// template<typename TSA_TYPE>
- // class SamplingArrayEntryIterator
+ // class SamplingArrayEntryIterator
// : public tsa::ArrayEntry
// {
// size_t const N; // (approximate) total number of occurrences
@@ -46,7 +46,7 @@ namespace ugdiss
// public:
// SamplingArrayEntryIterator(TSA_TYPE::tree_iterator const& m, size_t const s);
// bool step(); // returns false when at end of range
- // bool done(); //
+ // bool done(); //
// };
// template<typename TSA_TYPE>
@@ -60,7 +60,7 @@ namespace ugdiss
// , root(m.root)
// , stop(m.upper_bound(-1))
// { }
-
+
// template<typename TSA_TYPE>
// bool
// SamplingArrayEntryIterator::
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_base.h b/moses/TranslationModel/UG/mm/ug_tsa_base.h
index 83593c79c..8a4117910 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_base.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_base.h
@@ -26,7 +26,7 @@ namespace ugdiss
namespace bio=boost::iostreams;
template<typename TKN>
- TKN const*
+ TKN const*
next(TKN const* x)
{
return static_cast<TKN const*>(x ? x->next() : NULL);
@@ -42,20 +42,20 @@ namespace ugdiss
* ordering of sequences. Both are decleared/defined in
* ug_corpus_token.{h|cc}
*/
- template<typename TKN>
- class TSA
+ template<typename TKN>
+ class TSA
{
public:
virtual ~TSA() {};
- typedef TSA_tree_iterator<TKN> tree_iterator;
+ typedef TSA_tree_iterator<TKN> tree_iterator;
// allows iteration over the array as if it were a trie
- typedef tsa::ArrayEntry ArrayEntry;
+ typedef tsa::ArrayEntry ArrayEntry;
/* an entry in the array, for iteration over all occurrences of a
* particular sequence */
- // typedef boost::dynamic_bitset<uint64_t> bitset;
+ // typedef boost::dynamic_bitset<uint64_t> bitset;
typedef boost::shared_ptr<bitvector> bitset_pointer;
typedef TKN Token;
- typedef BitSetCache<TSA<TKN> > BSC_t;
+ typedef BitSetCache<TSA<TKN> > BSC_t;
/* to allow caching of bit vectors that are expensive to create on
* the fly */
@@ -67,7 +67,7 @@ namespace ugdiss
char const* endArray; // ... and end ...
// of memory block storing the actual TSA
- size_t corpusSize;
+ size_t corpusSize;
/** size of the corpus (in number of sentences) of the corpus
* underlying the sequence array.
*
@@ -76,37 +76,37 @@ namespace ugdiss
* suffix array is based on a subset
* of the sentences of /corpus/.
*/
-
- id_type numTokens;
+
+ id_type numTokens;
/** size of the corpus (in number of tokens) of the corpus underlying the
- * sequence array.
+ * sequence array.
*
* ATTENTION: This number may differ from corpus->numTokens(), namely when
- * the suffix array is based on a subset of the sentences of
+ * the suffix array is based on a subset of the sentences of
* /corpus/.
*/
- id_type indexSize;
- // (number of entries +1) in the index of root-level nodes
+ id_type indexSize;
+ // (number of entries +1) in the index of root-level nodes
size_t BitSetCachingThreshold;
-
+
////////////////////////////////////////////////////////////////
// private member functions:
- /** @return an index position approximately /fraction/ between
+ /** @return an index position approximately /fraction/ between
* /startRange/ and /endRange/.
- */
- virtual
- char const*
- index_jump(char const* startRange,
- char const* stopRange,
+ */
+ virtual
+ char const*
+ index_jump(char const* startRange,
+ char const* stopRange,
float fraction) const = 0;
-
- /** return the index position of the first item that
+
+ /** return the index position of the first item that
* is equal to or includes [refStart,refStart+refLen) as a prefix
*/
- char const*
+ char const*
find_start(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
@@ -114,19 +114,19 @@ namespace ugdiss
/** return the index position of the first item that is greater than
* [refStart,refStart+refLen) and does not include it as a prefix
*/
- char const*
+ char const*
find_end(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
-
+
/** return the index position of the first item that is longer than
* [refStart,refStart+refLen) and includes it as a prefix
*/
- char const*
+ char const*
find_longer(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
-
+
/** Returns a char const* pointing to the position in the data block
* where the first item starting with token /id/ is located.
*/
@@ -140,37 +140,37 @@ namespace ugdiss
public:
boost::shared_ptr<BSC_t> bsc;
-
+
char const* arrayStart() const { return startArray; }
char const* arrayEnd() const { return endArray; }
- /** @return a pointer to the beginning of the index entry range covering
+ /** @return a pointer to the beginning of the index entry range covering
* [keyStart,keyStop)
*/
- char const*
+ char const*
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
- char const*
+ char const*
lower_bound(TKN const* keyStart, TKN const* keyStop) const;
- char const*
+ char const*
lower_bound(TKN const* keyStart, int keyLen) const;
- /** @return a pointer to the end point of the index entry range covering
+ /** @return a pointer to the end point of the index entry range covering
* [keyStart,keyStop)
*/
- char const*
- upper_bound(typename vector<TKN>::const_iterator const& keyStart,
+ char const*
+ upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
- char const*
+ char const*
upper_bound(TKN const* keyStart, int keyLength) const;
/** dump all suffixes in order to /out/ */
void dump(ostream& out, TokenIndex const& T) const;
-
- /** fill the dynamic bit set with true for all sentences that contain
+
+ /** fill the dynamic bit set with true for all sentences that contain
* /phrase/.
* @return the raw number of occurrences.
*/
@@ -188,70 +188,70 @@ namespace ugdiss
setTokenBits(char const* startRange, char const* endRange, size_t len,
bitvector& bs) const;
- /** read the sentence ID into /sid/
- * @return position of associated offset.
+ /** read the sentence ID into /sid/
+ * @return position of associated offset.
*
* The function provides an abstraction that uses the right
* interpretation of the position based on the subclass
* (memory-mapped or in-memory).
*/
virtual
- char const*
+ char const*
readSid(char const* p, char const* q, id_type& sid) const = 0;
virtual
- char const*
+ char const*
readSid(char const* p, char const* q, ::uint64_t& sid) const = 0;
- /** read the offset part of the index entry into /offset/
- * @return position of the next entry in the index.
+ /** read the offset part of the index entry into /offset/
+ * @return position of the next entry in the index.
*
* The function provides an abstraction that uses the right
* interpretation of the position based on the subclass
* (memory-mapped or in-memory).
*/
virtual
- char const*
+ char const*
readOffset(char const* p, char const* q, uint16_t& offset) const = 0;
virtual
- char const*
+ char const*
readOffset(char const* p, char const* q, ::uint64_t& offset) const = 0;
- /** @return sentence count
+ /** @return sentence count
*/
count_type
- sntCnt(char const* p, char const* const q) const;
-
+ sntCnt(char const* p, char const* const q) const;
+
count_type
- rawCnt2(TKN const* keyStart, size_t keyLen) const;
+ rawCnt2(TKN const* keyStart, size_t keyLen) const;
/** @return raw occurrence count
- *
+ *
* depending on the subclass, this is constant time (imTSA) or
* linear in in the number of occurrences (mmTSA).
*/
virtual
count_type
- rawCnt(char const* p, char const* const q) const = 0;
+ rawCnt(char const* p, char const* const q) const = 0;
- /** get both sentence and word counts.
+ /** get both sentence and word counts.
*
* Avoids having to go over the byte range representing the range
* of suffixes in question twice when dealing with memory-mapped
* suffix arrays.
- */
+ */
virtual
- void
- getCounts(char const* p, char const* const q,
- count_type& sids, count_type& raw) const = 0;
+ void
+ getCounts(char const* p, char const* const q,
+ count_type& sids, count_type& raw) const = 0;
- string
- suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
+ string
+ suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
const;
- string
- suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
+ string
+ suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
const;
tsa::ArrayEntry& readEntry(char const* p, tsa::ArrayEntry& I) const;
@@ -260,36 +260,36 @@ namespace ugdiss
char const* dataEnd() const;
bool sanityCheck1() const;
-
- /** Return an ID that represents a given phrase;
+
+ /** Return an ID that represents a given phrase;
This should NEVER be 0!
- Structure of a phrase ID:
+ Structure of a phrase ID:
leftmost 32 bits: sentence ID in the corpus
next 16 bits: offset from the start of the sentence
next 16 bits: length of the phrase
*/
- ::uint64_t
+ ::uint64_t
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
typename vector<TKN>::const_iterator const& pstop) const;
-
- ::uint64_t
+
+ ::uint64_t
getSequenceId(TKN const* t, ushort plen) const;
-
+
/** Return the phrase represented by phrase ID pid_ */
string
getSequence(::uint64_t pid, TokenIndex const& V) const;
-
+
/** Return the phrase represented by phrase ID pid_ */
vector<TKN>
getSequence(::uint64_t pid) const;
- TKN const*
+ TKN const*
getSequenceStart(::uint64_t) const;
ushort
getSequenceLength(::uint64_t) const;
- size_t
+ size_t
getCorpusSize() const;
Ttrack<TKN> const*
@@ -297,13 +297,13 @@ namespace ugdiss
bitset_pointer
getBitSet(TKN const* startKey, size_t keyLen) const;
-
+
boost::shared_ptr<bitvector>
- findTree(TKN const* treeStart, TKN const* treeEnd,
+ findTree(TKN const* treeStart, TKN const* treeEnd,
bitvector const* filter) const;
-
+
size_t markOccurrences(char const* lo, char const* up, size_t len,
- bitvector& bitset,
+ bitvector& bitset,
bool markOnlyStartPosition) const;
bool
@@ -311,13 +311,13 @@ namespace ugdiss
vector<tree_iterator>& dest) const;
double aveIndexEntrySize() const
- {
- return (endArray-startArray)/double(numTokens);
+ {
+ return (endArray-startArray)/double(numTokens);
}
public:
- // virtual
- sptr<TSA_tree_iterator<TKN> >
+ // virtual
+ sptr<TSA_tree_iterator<TKN> >
find(TKN const* start, size_t len) const
{
typedef TSA_tree_iterator<TKN> iter;
@@ -333,7 +333,7 @@ namespace ugdiss
// ======================================================================
// template<typename TOKEN>
- // sptr<TSA_tree_iterator<TOKEN> >
+ // sptr<TSA_tree_iterator<TOKEN> >
// TSA<TOKEN>::
// find(TOKEN const* start, size_t len) const
// {
@@ -354,7 +354,7 @@ namespace ugdiss
* @return number of total occurrences of the phrase in the corpus
*/
template<typename TKN>
- count_type
+ count_type
TSA<TKN>::
fillBitSet(vector<TKN> const& key,
bitvector& bitset) const
@@ -362,7 +362,7 @@ namespace ugdiss
if (!key.size()) return 0;
return fillBitset(&(key[0]),key.size(),bitset);
}
-
+
// ---------------------------------------------------------------------------
/** fill the dynamic bitset with information as to which sentences
@@ -370,7 +370,7 @@ namespace ugdiss
* @return number of total occurrences of the phrase in the corpus
*/
template<typename TKN>
- count_type
+ count_type
TSA<TKN>::
fillBitSet(TKN const* key, size_t keyLen,
bitvector& bitset) const
@@ -385,7 +385,7 @@ namespace ugdiss
// ---------------------------------------------------------------------------
template<typename TKN>
- count_type
+ count_type
TSA<TKN>::
setBits(char const* startRange, char const* endRange,
bitvector& bs) const
@@ -452,7 +452,7 @@ namespace ugdiss
* of the token range matching [startKey,endKey)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
find_start(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
@@ -485,12 +485,12 @@ namespace ugdiss
* of the token range matching [startKey,endKey)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
find_end(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const
-
+
{
char const* up = upX;
if (lo >= up) return NULL;
@@ -520,7 +520,7 @@ namespace ugdiss
* but continues on
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
find_longer(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
@@ -553,7 +553,7 @@ namespace ugdiss
* given search phrase
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
@@ -570,7 +570,7 @@ namespace ugdiss
* given search phrase
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
lower_bound(TKN const* const keyStart,
TKN const* const keyStop) const
@@ -579,7 +579,7 @@ namespace ugdiss
}
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
lower_bound(TKN const* const keyStart, int keyLen) const
{
@@ -595,7 +595,7 @@ namespace ugdiss
* given search phrase (i.e., points just beyond the range)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
@@ -612,7 +612,7 @@ namespace ugdiss
* given search phrase (i.e., points just beyond the range)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
upper_bound(TKN const* keyStart, int keyLength) const
{
@@ -645,7 +645,7 @@ namespace ugdiss
{
return getSequenceId(&(*pstart),pstop-pstart);
}
-
+
//---------------------------------------------------------------------------
template<typename TKN>
@@ -667,14 +667,14 @@ namespace ugdiss
//---------------------------------------------------------------------------
- template<typename TKN>
+ template<typename TKN>
vector<TKN>
TSA<TKN>::
getSequence(::uint64_t pid) const
{
size_t plen = pid % 65536;
size_t offset = (pid >> 16) % 65536;
- TKN const* w = corpus->sntStart(pid >> 32)+offset;
+ TKN const* w = corpus->sntStart(pid >> 32)+offset;
vector<TKN> ret(plen);
for (size_t i = 0; i < plen; i++, w = w->next())
{
@@ -684,7 +684,7 @@ namespace ugdiss
return ret;
}
- template<typename TKN>
+ template<typename TKN>
string
TSA<TKN>::
getSequence(::uint64_t pid, TokenIndex const& V) const
@@ -698,21 +698,21 @@ namespace ugdiss
return buf.str();
}
-
+
//---------------------------------------------------------------------------
- template<typename TKN>
+ template<typename TKN>
TKN const*
TSA<TKN>::
getSequenceStart(::uint64_t pid) const
{
size_t offset = (pid >> 16) % 65536;
- return corpus->sntStart(pid >> 32)+offset;
+ return corpus->sntStart(pid >> 32)+offset;
}
-
+
//---------------------------------------------------------------------------
- template<typename TKN>
+ template<typename TKN>
ushort
TSA<TKN>::
getSequenceLength(::uint64_t pid) const
@@ -729,7 +729,7 @@ namespace ugdiss
{
return corpusSize;
}
-
+
//---------------------------------------------------------------------------
template<typename TKN>
@@ -756,7 +756,7 @@ namespace ugdiss
};
//---------------------------------------------------------------------------
-
+
/// find all instances of the tree described by [treeStart, treeEnd)
template<typename TKN>
typename TSA<TKN>::bitset_pointer
@@ -764,7 +764,7 @@ namespace ugdiss
getBitSet(TKN const* startKey, size_t keyLen) const
{
bitset_pointer ret;
- if (bsc != NULL)
+ if (bsc != NULL)
ret = bsc->get(startKey,keyLen);
else
{
@@ -773,7 +773,7 @@ namespace ugdiss
}
return ret;
}
-
+
//---------------------------------------------------------------------------
template<typename TKN>
@@ -809,12 +809,12 @@ namespace ugdiss
vector<tree_iterator>& dest) const
{
dest.assign(terminals.count(),tree_iterator(this));
- for (size_t i = terminals.find_first(), k = 0;
- i < terminals.size();
+ for (size_t i = terminals.find_first(), k = 0;
+ i < terminals.size();
i = terminals.find_next(i),++k)
{
for (TKN const* x = base+i; x && x->id(); x = x->next())
- if (!dest[k].extend(x->id()))
+ if (!dest[k].extend(x->id()))
return false;
}
typename tree_iterator::SortByApproximateCount sorter;
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h b/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h
index 3111f1c1d..d13449e36 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h
@@ -20,7 +20,7 @@ namespace ugdiss
{
using namespace std;
template<typename TSA>
- class
+ class
BitSetCache
{
public:
@@ -33,15 +33,15 @@ namespace ugdiss
myMap cached1,cached2;
int threshold;
public:
-
+
BitSetCache() : tsa(NULL), threshold(0) {};
- BitSetCache(TSA const* t, size_t th=4194304)
+ BitSetCache(TSA const* t, size_t th=4194304)
{
init(t,th);
};
- void
- init(TSA const* t, size_t th=4194304)
+ void
+ init(TSA const* t, size_t th=4194304)
{
tsa = t;
threshold = th;
@@ -84,7 +84,7 @@ namespace ugdiss
if (up-lo > threshold)
{
pair<char const*,ushort> k(lo,keyLen);
- // cout << "bla " << keyStart->id() << " "
+ // cout << "bla " << keyStart->id() << " "
// << cached2.size() << " " << up-lo << " " << k.second << endl;
myMapIter m = cached2.find(k);
if (m != cached2.end())
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
index 508f09304..053ff2445 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
@@ -23,24 +23,24 @@ namespace ugdiss
template<typename T>
void display(T const* x, string label)
{
- cout << label << ":";
- for (;x;x=next(x)) cout << " " << x->lemma;
- cout << endl;
+ cout << label << ":";
+ for (;x;x=next(x)) cout << " " << x->lemma;
+ cout << endl;
}
#endif
template<typename T> class TSA;
// CLASS DEFINITION
- // The TSA_tree_iterator allows traversal of a Token Sequence Array
+ // The TSA_tree_iterator allows traversal of a Token Sequence Array
// as if it was a trie.
//
// down(): go to first child
- // over(): go to next sibling
+ // over(): go to next sibling
// up(): go to parent
// extend(id): go to a specific child node
// all four functions return true if successful, false otherwise
- // lower_bound() and upper_bound() give the range of entries in the
+ // lower_bound() and upper_bound() give the range of entries in the
// array covered by the "virtual trie node".
template<typename TKN>
class
@@ -49,7 +49,7 @@ namespace ugdiss
protected:
vector<char const*> lower;
vector<char const*> upper;
-
+
// for debugging ...
void showBounds(ostream& out) const;
public:
@@ -57,7 +57,7 @@ namespace ugdiss
virtual ~TSA_tree_iterator() {};
- TSA<Token> const* root;
+ TSA<Token> const* root;
// TO BE DONE: make the pointer private and add a const function
// to return the pointer
@@ -66,16 +66,16 @@ namespace ugdiss
TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other);
TSA_tree_iterator(TSA<Token> const* r, id_type const* s, size_t const len);
// TSA_tree_iterator(TSA<Token> const* s, Token const& t);
- TSA_tree_iterator(TSA<Token> const* s,
- Token const* kstart,
- size_t const len,
+ TSA_tree_iterator(TSA<Token> const* s,
+ Token const* kstart,
+ size_t const len,
bool full_match_only=true);
- TSA_tree_iterator(TSA<Token> const* s,
- Token const* kstart,
- Token const* kend,
+ TSA_tree_iterator(TSA<Token> const* s,
+ Token const* kstart,
+ Token const* kend,
bool full_match_only=true);
- TSA_tree_iterator(TSA<Token> const* s,
- TokenIndex const& V,
+ TSA_tree_iterator(TSA<Token> const* s,
+ TokenIndex const& V,
string const& key);
char const* lower_bound(int p) const;
@@ -104,49 +104,49 @@ namespace ugdiss
bool match(id_type sid) const;
// fillBitSet: deprecated; use markSentences() instead
- count_type
+ count_type
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
-
- count_type
+
+ count_type
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<typename ::uint64_t>& dest) const;
- count_type
+ count_type
markSequence(Token const* start, Token const* stop, bitvector& dest) const;
-
- count_type
+
+ count_type
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
-
- count_type
+
+ count_type
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset,
bool markOnlyStartPosition=false) const;
-
- count_type
+
+ count_type
markOccurrences(vector<ushort>& dest) const;
-
- ::uint64_t
+
+ ::uint64_t
getSequenceId() const;
-
- // equivalent but more efficient than
+
+ // equivalent but more efficient than
// bitvector tmp; markSentences(tmp); foo &= tmp;
bitvector& filterSentences(bitvector& foo) const;
-
+
/// a special auxiliary function for finding trees
- void
- tfAndRoot(bitvector const& ref, // reference root positions
+ void
+ tfAndRoot(bitvector const& ref, // reference root positions
bitvector const& snt, // relevant sentences
bitvector& dest) const;
-
+
size_t arrayByteSpanSize(int p = -1) const
- {
+ {
if (lower.size()==0) return 0; // or endArray-startArray???
if (p < 0) p = lower.size()+p;
assert(p >=0 && p < int(lower.size()));
return lower.size() ? upper[p]-lower[p] : 0;
}
-
+
struct SortByApproximateCount
{
- bool operator()(TSA_tree_iterator const& a,
+ bool operator()(TSA_tree_iterator const& a,
TSA_tree_iterator const& b) const
{
if (a.size()==0) return b.size() ? true : false;
@@ -175,7 +175,7 @@ namespace ugdiss
size_t grow(Token const* snt, bitvector const& cov)
{
- size_t x = cov.find_first();
+ size_t x = cov.find_first();
while (x < cov.size() && extend(snt[x]))
x = cov.find_next(x);
return this->size();
@@ -183,7 +183,7 @@ namespace ugdiss
sptr<vector<typename ttrack::Position> >
randomSample(int level, size_t N) const;
-
+
};
//---------------------------------------------------------------------------
@@ -205,7 +205,7 @@ namespace ugdiss
assert(root->corpus->getToken(A));
assert(lo < root->getUpperBound(root->corpus->getToken(A)->id()));
lower.push_back(lo);
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
upper.push_back(root->upper_bound(foo,lower.size()));
return lower.size();
}
@@ -217,7 +217,7 @@ namespace ugdiss
Token const* z = next(a);
for (size_t i = 1; i < size(); ++i) z = next(z);
if (z < root->corpus->sntStart(A.sid) || z >= root->corpus->sntEnd(A.sid))
- {
+ {
char const* up = upper.back();
lo = root->find_longer(lo,up,a,lower.size(),0);
if (!lo) return false;
@@ -244,7 +244,7 @@ namespace ugdiss
TSA_tree_iterator<Token>::
over()
{
- if (lower.size() == 0)
+ if (lower.size() == 0)
return false;
if (lower.size() == 1)
{
@@ -254,7 +254,7 @@ namespace ugdiss
if (upper[0] < hi)
{
lower[0] = upper[0];
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
upper.back() = root->upper_bound(foo,lower.size());
}
else
@@ -264,11 +264,11 @@ namespace ugdiss
char const* lo = root->getLowerBound(wid);
if (lo == root->endArray) return false;
char const* hi = root->getUpperBound(wid);
- if (!hi) return false;
+ if (!hi) return false;
if (lo == hi) continue;
assert(lo);
lower[0] = lo;
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
upper.back() = root->upper_bound(foo,lower.size());
break;
}
@@ -293,7 +293,7 @@ namespace ugdiss
// display(root->corpus->getToken(U),"L2");
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
// display(foo,"F!");
upper.back() = root->upper_bound(foo,lower.size());
return true;
@@ -326,17 +326,17 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s)
- : root(s)
+ : root(s)
{};
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other)
- : root(s)
+ : root(s)
{
Token const* x = other.getToken(0);
for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i)
- x = x->next();
+ x = x->next();
};
@@ -345,9 +345,9 @@ namespace ugdiss
TSA_tree_iterator<Token>::
TSA_tree_iterator
(TSA<Token> const* r,
- id_type const* s,
+ id_type const* s,
size_t const len)
- : root(r)
+ : root(r)
{
for (id_type const* e = s + len; s < e && extend(*s); ++s);
};
@@ -357,16 +357,16 @@ namespace ugdiss
#if 1
template<typename Token>
TSA_tree_iterator<Token>::
- TSA_tree_iterator(TSA<Token> const* s,
- TokenIndex const& V,
+ TSA_tree_iterator(TSA<Token> const* s,
+ TokenIndex const& V,
string const& key)
: root(s)
{
istringstream buf(key); string w;
while (buf >> w)
{
- if (this->extend(V[w]))
- continue;
+ if (this->extend(V[w]))
+ continue;
else
{
lower.clear();
@@ -377,7 +377,7 @@ namespace ugdiss
};
#endif
-#if 0
+#if 0
// ---------------------------------------------------------------------------
template<typename Token>
@@ -394,7 +394,7 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, Token const& t)
- : root(s)
+ : root(s)
{
if (!root) return;
char const* up = root->getUpperBound(t.id());
@@ -409,33 +409,33 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
- TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
+ TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
size_t const len, bool full_match_only)
- : root(s)
+ : root(s)
{
if (!root) return;
size_t i = 0;
for (; i < len && kstart && extend(*kstart); ++i)
kstart = kstart->next();
- if (full_match_only && i != len)
+ if (full_match_only && i != len)
{
lower.clear();
upper.clear();
}
};
- // DEPRECATED: DO NOT USE. Use the one that takes the length
+ // DEPRECATED: DO NOT USE. Use the one that takes the length
// instead of kend.
template<typename Token>
TSA_tree_iterator<Token>::
- TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
+ TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
Token const* kend, bool full_match_only)
- : root(s)
+ : root(s)
{
- for (;kstart != kend; kstart = kstart->next())
- if (!extend(*kstart))
+ for (;kstart != kend; kstart = kstart->next())
+ if (!extend(*kstart))
break;
- if (full_match_only && kstart != kend)
+ if (full_match_only && kstart != kend)
{
lower.clear();
upper.clear();
@@ -445,7 +445,7 @@ namespace ugdiss
// ---------------------------------------------------------------------------
// EXTEND
// ---------------------------------------------------------------------------
-
+
template<typename Token>
bool
TSA_tree_iterator<Token>::
@@ -496,9 +496,9 @@ namespace ugdiss
template<typename Token>
size_t
TSA_tree_iterator<Token>::
- size() const
- {
- return lower.size();
+ size() const
+ {
+ return lower.size();
}
// ---------------------------------------------------------------------------
@@ -506,8 +506,8 @@ namespace ugdiss
template<typename Token>
id_type
TSA_tree_iterator<Token>::
- getSid() const
- {
+ getSid() const
+ {
char const* p = (lower.size() ? lower.back() : root->startArray);
char const* q = (upper.size() ? upper.back() : root->endArray);
id_type sid;
@@ -520,8 +520,8 @@ namespace ugdiss
template<typename Token>
::uint64_t
TSA_tree_iterator<Token>::
- getPid(int p) const
- {
+ getPid(int p) const
+ {
if (this->size() == 0) return 0;
if (p < 0) p += upper.size();
char const* lb = lower_bound(p);
@@ -531,7 +531,7 @@ namespace ugdiss
::uint64_t ret = (sid<<32) + (off<<16) + ::uint64_t(p+1);
return ret;
}
-
+
// ---------------------------------------------------------------------------
template<typename Token>
@@ -614,7 +614,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
{
@@ -624,7 +624,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
{
@@ -651,7 +651,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset, bool markOnlyStartPosition) const
{
@@ -667,7 +667,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markOccurrences(vector<ushort>& dest) const
{
@@ -694,10 +694,10 @@ namespace ugdiss
}
//---------------------------------------------------------------------------
- // mark all endpoints of instances of the path represented by this
+ // mark all endpoints of instances of the path represented by this
// iterator in the sentence [start,stop)
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<typename ::uint64_t>& dest) const
@@ -726,10 +726,10 @@ namespace ugdiss
}
//---------------------------------------------------------------------------
- // mark all occurrences of the sequence represented by this
+ // mark all occurrences of the sequence represented by this
// iterator in the sentence [start,stop)
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markSequence(Token const* start,
Token const* stop,
@@ -784,7 +784,7 @@ namespace ugdiss
{
assert(x);
buf << (i > start ? " " : "");
- if (V) buf << (*V)[x->id()];
+ if (V) buf << (*V)[x->id()];
else buf << x->id();
}
return buf.str();
@@ -807,13 +807,13 @@ namespace ugdiss
{
assert(x);
buf << (i > start ? " " : "");
- buf << V[x->id()].str;
+ buf << V[x->id()].str;
}
return buf.str();
}
#endif
- /// @return true if the sentence [start,stop) contains the sequence
+ /// @return true if the sentence [start,stop) contains the sequence
template<typename Token>
bool
TSA_tree_iterator<Token>::
@@ -823,7 +823,7 @@ namespace ugdiss
for (Token const* t = start; t < stop; ++t)
{
if (*t != *a) continue;
- Token const* b = a;
+ Token const* b = a;
Token const* y = t;
size_t i;
for (i = 1; i < lower.size(); ++i)
@@ -838,7 +838,7 @@ namespace ugdiss
return false;
}
- /// @return true if the sentence /sid/ contains the sequence
+ /// @return true if the sentence /sid/ contains the sequence
template<typename Token>
bool
TSA_tree_iterator<Token>::
@@ -851,9 +851,9 @@ namespace ugdiss
// @param sntcheck: number of roots in the respective sentence
// @param dest: bitvector to keep track of the exact root location
template<typename Token>
- void
+ void
TSA_tree_iterator<Token>::
- tfAndRoot(bitvector const& ref, // reference root positions
+ tfAndRoot(bitvector const& ref, // reference root positions
bitvector const& snt, // relevant sentences
bitvector& dest) const
{
@@ -880,12 +880,12 @@ namespace ugdiss
filterSentences(bitvector& bv) const
{
float aveSntLen = root->corpus->numTokens()/root->corpus->size();
- size_t ANDcost = bv.size()/8; // cost of dest&=ref;
+ size_t ANDcost = bv.size()/8; // cost of dest&=ref;
float aveEntrySize = ((root->endArray-root->startArray)
/root->corpus->numTokens());
if (arrayByteSpanSize()+ANDcost < aveEntrySize*aveSntLen*bv.count())
{
- bitvector tmp(bv.size());
+ bitvector tmp(bv.size());
markSentences(tmp);
bv &= tmp;
}
@@ -906,9 +906,9 @@ namespace ugdiss
if (level < 0) level += lower.size();
assert(level >=0);
- sptr<vector<typename ttrack::Position> >
+ sptr<vector<typename ttrack::Position> >
ret(new vector<typename ttrack::Position>(N));
-
+
size_t m=0; // number of samples selected so far
typename Token::ArrayEntry I(lower.at(level));
@@ -916,7 +916,7 @@ namespace ugdiss
while (m < N && (I.next) < stop)
{
root->readEntry(I.next,I);
-
+
// t: expected number of remaining samples
const double t = (stop - I.pos)/root->aveIndexEntrySize();
const double r = util::rand_excl(t);
@@ -930,6 +930,6 @@ namespace ugdiss
return ret;
}
-
+
} // end of namespace ugdiss
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_ttrack_base.cc b/moses/TranslationModel/UG/mm/ug_ttrack_base.cc
index 644c53c3a..60d20a5f9 100644
--- a/moses/TranslationModel/UG/mm/ug_ttrack_base.cc
+++ b/moses/TranslationModel/UG/mm/ug_ttrack_base.cc
@@ -9,12 +9,12 @@
namespace ugdiss
{
using namespace std;
-
+
#if 0
template<>
id_type
Ttrack<id_type>::
- toID(id_type const& t)
+ toID(id_type const& t)
{
return t;
}
diff --git a/moses/TranslationModel/UG/mm/ug_ttrack_base.h b/moses/TranslationModel/UG/mm/ug_ttrack_base.h
index f9864bda6..d087a9e58 100644
--- a/moses/TranslationModel/UG/mm/ug_ttrack_base.h
+++ b/moses/TranslationModel/UG/mm/ug_ttrack_base.h
@@ -2,7 +2,7 @@
// Base class for corpus tracks. mmTtrack (memory-mapped Ttrack) and imTtrack (in-memory Ttrack)
// are derived from this class.
-// This code is part of a refactorization of the earlier Ttrack class as a template class for
+// This code is part of a refactorization of the earlier Ttrack class as a template class for
// tokens of arbitrary fixed-length size.
// (c) 2007-2009 Ulrich Germann. All rights reserved.
@@ -27,8 +27,8 @@ namespace ugdiss
typedef boost::dynamic_bitset<uint64_t> bdBitset;
template<typename sid_t, typename off_t, typename len_t>
- void
- parse_pid(uint64_t const pid, sid_t & sid,
+ void
+ parse_pid(uint64_t const pid, sid_t & sid,
off_t & off, len_t& len)
{
static uint64_t two32 = uint64_t(1)<<32;
@@ -39,12 +39,12 @@ namespace ugdiss
}
template<typename Token>
- string
+ string
toString(TokenIndex const& V, Token const* x, size_t const len)
{
if (!len) return "";
UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
- ostringstream buf;
+ ostringstream buf;
buf << V[x->id()];
size_t i = 1;
for (x = x->next(); x && i < len; ++i, x = x->next())
@@ -63,66 +63,66 @@ namespace ugdiss
typedef TKN Token;
/** @return a pointer to beginning of sentence /sid/ */
- virtual
- TKN const*
- sntStart(size_t sid) const = 0;
+ virtual
+ TKN const*
+ sntStart(size_t sid) const = 0;
/** @return end point of sentence /sid/ */
- virtual
- TKN const*
- sntEnd(size_t sid) const = 0;
+ virtual
+ TKN const*
+ sntEnd(size_t sid) const = 0;
TKN const*
getToken(Position const& p) const;
template<typename T>
- T const*
- getTokenAs(Position const& p) const
+ T const*
+ getTokenAs(Position const& p) const
{ return reinterpret_cast<T const*>(getToken(p)); }
template<typename T>
T const*
- sntStartAs(id_type sid) const
+ sntStartAs(id_type sid) const
{ return reinterpret_cast<T const*>(sntStart(sid)); }
template<typename T>
T const*
- sntEndAs(id_type sid) const
+ sntEndAs(id_type sid) const
{ return reinterpret_cast<T const*>(sntEnd(sid)); }
/** @return length of sentence /sid/ */
size_t sntLen(size_t sid) const { return sntEnd(sid) - sntStart(sid); }
- size_t
+ size_t
startPos(id_type sid) const { return sntStart(sid)-sntStart(0); }
-
- size_t
+
+ size_t
endPos(id_type sid) const { return sntEnd(sid)-sntStart(0); }
/** Don't use this unless you want a copy of the sentence */
- vector<TKN>
- operator[](id_type sid) const
- {
- return vector<TKN>(sntStart(sid),sntEnd(sid));
+ vector<TKN>
+ operator[](id_type sid) const
+ {
+ return vector<TKN>(sntStart(sid),sntEnd(sid));
}
/** @return size of corpus in number of sentences */
- virtual size_t size() const = 0;
+ virtual size_t size() const = 0;
/** @return size of corpus in number of words/tokens */
- virtual size_t numTokens() const = 0;
+ virtual size_t numTokens() const = 0;
- /** @return string representation of sentence /sid/
+ /** @return string representation of sentence /sid/
* Currently only defined for Ttrack<id_type> */
string str(id_type sid, TokenIndex const& T) const;
string pid2str(TokenIndex const* V, uint64_t pid) const;
- // /** @return string representation of sentence /sid/
+ // /** @return string representation of sentence /sid/
// * Currently only defined for Ttrack<id_type> */
// string str(id_type sid, Vocab const& V) const;
-
- /** counts the tokens in the corpus; used for example in the construction of
+
+ /** counts the tokens in the corpus; used for example in the construction of
* token sequence arrays */
count_type count_tokens(vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff=0, ostream* log=NULL) const;
@@ -130,7 +130,7 @@ namespace ugdiss
// static id_type toID(TKN const& t);
int cmp(Position const& A, Position const& B, int keyLength) const;
- int cmp(Position const& A, TKN const* keyStart, int keyLength=-1,
+ int cmp(Position const& A, TKN const* keyStart, int keyLength=-1,
int depth=0) const;
virtual id_type findSid(TKN const* t) const = 0; // find the sentence id of a given token
@@ -139,18 +139,18 @@ namespace ugdiss
// the following three functions are currently not used by any program ... (deprecate?)
TKN const*
- find_next_within_sentence(TKN const* startKey,
- int keyLength,
+ find_next_within_sentence(TKN const* startKey,
+ int keyLength,
Position startHere) const;
Position
- find_first(TKN const* startKey, int keyLength,
+ find_first(TKN const* startKey, int keyLength,
bdBitset const* filter=NULL) const;
Position
- find_next(TKN const* startKey, int keyLength, Position startAfter,
+ find_next(TKN const* startKey, int keyLength, Position startAfter,
bdBitset const* filter=NULL) const;
-
+
virtual size_t offset(TKN const* t) const { return t-sntStart(0); }
};
@@ -171,11 +171,11 @@ namespace ugdiss
template<typename TKN>
count_type
Ttrack<TKN>::
- count_tokens(vector<count_type>& cnt, bdBitset const* filter,
+ count_tokens(vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff, ostream* log) const
{
- bdBitset filter2;
- if (!filter)
+ bdBitset filter2;
+ if (!filter)
{
filter2.resize(this->size());
filter2.set();
@@ -184,21 +184,21 @@ namespace ugdiss
cnt.clear();
cnt.reserve(500000);
count_type totalCount=0;
-
+
int64_t expectedTotal=0;
for (size_t sid = 0; sid < this->size(); ++sid)
expectedTotal += this->sntLen(sid);
-
+
for (size_t sid = filter->find_first();
sid < filter->size();
sid = filter->find_next(sid))
{
TKN const* k = sntStart(sid);
TKN const* const stop = sntEnd(sid);
- if (lengthCutoff && stop-k >= lengthCutoff)
+ if (lengthCutoff && stop-k >= lengthCutoff)
{
- if (log)
- *log << "WARNING: skipping sentence #" << sid
+ if (log)
+ *log << "WARNING: skipping sentence #" << sid
<< " with more than 65536 tokens" << endl;
expectedTotal -= stop-k;
}
@@ -217,7 +217,7 @@ namespace ugdiss
if (this->size() == filter->count())
{
if (totalCount != expectedTotal)
- cerr << "OOPS: expected " << expectedTotal
+ cerr << "OOPS: expected " << expectedTotal
<< " tokens but counted " << totalCount << endl;
assert(totalCount == expectedTotal);
}
@@ -256,16 +256,16 @@ namespace ugdiss
a = next(a);
b = next(b);
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
- if (--keyLength==0 || b < bosB || b >= eosB)
- {
+ if (--keyLength==0 || b < bosB || b >= eosB)
+ {
ret = (a < bosA || a >= eosA) ? 0 : 1;
break;
}
}
// cerr << "RETURNING " << ret << endl;
- return ret;
+ return ret;
}
-
+
template<typename TKN>
int
Ttrack<TKN>::
@@ -287,17 +287,17 @@ namespace ugdiss
if (*x > *key) return 2;
key = key->next();
x = x->next();
- if (--keyLength==0) // || !key)
+ if (--keyLength==0) // || !key)
return (x == stopx) ? 0 : 1;
assert(key);
}
- return -1;
+ return -1;
}
template<typename TKN>
- TKN const*
+ TKN const*
Ttrack<TKN>::
- find_next_within_sentence(TKN const* startKey, int keyLength,
+ find_next_within_sentence(TKN const* startKey, int keyLength,
Position startHere) const
{
for (TKN const* t = getToken(startHere); t; t = getToken(startHere))
@@ -308,12 +308,12 @@ namespace ugdiss
{
TKN const* k = startKey->next();
TKN const* t2 = t->next();
- if (t2)
+ if (t2)
{
- cout << t2->lemma << "." << int(t2->minpos) << " "
+ cout << t2->lemma << "." << int(t2->minpos) << " "
<< k->lemma << "." << int(k->minpos) << " "
<< t2->cmp(*k) << endl;
- }
+ }
}
#endif
int x = cmp(startHere,startKey,keyLength,0);
@@ -330,8 +330,8 @@ namespace ugdiss
{
if (filter)
{
- for (size_t sid = filter->find_first();
- sid < filter->size();
+ for (size_t sid = filter->find_first();
+ sid < filter->size();
sid = filter->find_next(sid))
{
TKN const* x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
@@ -348,7 +348,7 @@ namespace ugdiss
}
return Position(this->size(),0);
}
-
+
template<typename TKN>
typename Ttrack<TKN>::Position
Ttrack<TKN>::
@@ -411,6 +411,6 @@ namespace ugdiss
}
return buf.str();
}
-
+
}
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_ttrack_position.h b/moses/TranslationModel/UG/mm/ug_ttrack_position.h
index 64fab3afb..6d473f263 100644
--- a/moses/TranslationModel/UG/mm/ug_ttrack_position.h
+++ b/moses/TranslationModel/UG/mm/ug_ttrack_position.h
@@ -6,7 +6,7 @@
#include "ug_typedefs.h"
// A token position in a Ttrack, with a LESS functor for comparing token
-// positions in whatever sorting order the underlying token type implies.
+// positions in whatever sorting order the underlying token type implies.
//
// (c) 2007-2010 Ulrich Germann. All rights reserved.
@@ -26,19 +26,19 @@ namespace ugdiss
Position(id_type _sid, ushort _off);
template<typename TTRACK_TYPE> class LESS; // probably abandoned
}; // end of deklaration of Position
-
-#if 1
+
+#if 1
template<typename TTRACK_TYPE>
- class
+ class
Position::
LESS
{
TTRACK_TYPE const* c;
public:
typedef typename TTRACK_TYPE::Token Token;
-
+
LESS(TTRACK_TYPE const* crp) : c(crp) {};
-
+
bool operator()(Position const& A, Position const& B) const
{
Token const* a = c->getToken(A); assert(a);
@@ -48,30 +48,30 @@ namespace ugdiss
Token const* bosA = c->sntStart(A.sid);
Token const* eosA = c->sntEnd(A.sid);
-
+
Token const* bosB = c->sntStart(B.sid);
Token const* eosB = c->sntEnd(B.sid);
-
+
#if 0
- Token const* z = a;
+ Token const* z = a;
cout << "A: " << z->id();
for (z = next(z); z >= bosA && z < eosA; z = next(z))
- cout << "-" << z->id();
+ cout << "-" << z->id();
cout << endl;
-
- z = b;
+
+ z = b;
cout << "B: " << z->id();
for (z = next(z); z >= bosB && z < eosB; z = next(z))
- cout << "-" << z->id();
+ cout << "-" << z->id();
cout << endl;
#endif
while (*a == *b)
{
a = next(a);
b = next(b);
- if (a < bosA || a >= eosA)
+ if (a < bosA || a >= eosA)
return (b >= bosB && b < eosB);
- if (b < bosB || b >= eosB)
+ if (b < bosB || b >= eosB)
return false;
}
int x = a->cmp(*b);
@@ -86,4 +86,4 @@ namespace ugdiss
} // end of namespace ttrack
} // end of namespace ugdiss
#endif
-
+
diff --git a/moses/TranslationModel/UG/mm/ug_typedefs.h b/moses/TranslationModel/UG/mm/ug_typedefs.h
index 83c8684e0..0181bef9e 100644
--- a/moses/TranslationModel/UG/mm/ug_typedefs.h
+++ b/moses/TranslationModel/UG/mm/ug_typedefs.h
@@ -24,7 +24,7 @@ namespace ugdiss
typedef vector<vector<short> > short_2d_table;
typedef vector<short_2d_table> short_3d_table;
typedef vector<short_3d_table> short_4d_table;
-
+
typedef vector<vector<int> > int_2d_table;
typedef vector<int_2d_table> int_3d_table;
typedef vector<int_3d_table> int_4d_table;
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 4e9e97766..6e680bbc5 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -19,7 +19,7 @@ namespace Moses
using namespace std;
using namespace boost;
- void
+ void
fillIdSeq(Phrase const& mophrase, size_t const ifactor,
TokenIndex const& V, vector<id_type>& dest)
{
@@ -30,8 +30,8 @@ namespace Moses
dest[i] = V[f->ToString()];
}
}
-
- void
+
+ void
parseLine(string const& line, map<string,string> & param)
{
char_separator<char> sep("; ");
@@ -79,13 +79,13 @@ namespace Moses
, context_key(((char*)this)+1)
// , m_tpc_ctr(0)
, ofactor(1,0)
- {
- init(line);
+ {
+ init(line);
setup_local_feature_functions();
Register();
}
- void
+ void
Mmsapt::
read_config_file(string fname, map<string,string>& param)
{
@@ -99,9 +99,9 @@ namespace Moses
tokenizer<char_separator<char> >::const_iterator t = tokens.begin();
if (t == tokens.end()) continue;
string& foo = param[*t++];
- if (t == tokens.end() || foo.size()) continue;
+ if (t == tokens.end() || foo.size()) continue;
// second condition: do not overwrite settings from the line in moses.ini
- UTIL_THROW_IF2(*t++ != "=" || t == tokens.end(),
+ UTIL_THROW_IF2(*t++ != "=" || t == tokens.end(),
"Syntax error in Mmsapt config file '" << fname << "'.");
for (foo = *t++; t != tokens.end(); foo += " " + *t++);
}
@@ -120,7 +120,7 @@ namespace Moses
m_is_integer.push_back(ff->isIntegerValued(i));
}
}
-
+
bool Mmsapt::isLogVal(int i) const { return m_is_logval.at(i); }
bool Mmsapt::isInteger(int i) const { return m_is_integer.at(i); }
@@ -130,7 +130,7 @@ namespace Moses
parseLine(line,this->param);
this->m_numScoreComponents = atoi(param["num-features"].c_str());
-
+
m = param.find("config");
if (m != param.end())
read_config_file(m->second,param);
@@ -138,17 +138,17 @@ namespace Moses
m = param.find("base");
if (m != param.end())
{
- m_bname = m->second;
+ m_bname = m->second;
m = param.find("path");
UTIL_THROW_IF2((m != param.end() && m->second != m_bname),
- "Conflicting aliases for path:\n"
+ "Conflicting aliases for path:\n"
<< "path=" << string(m->second) << "\n"
<< "base=" << m_bname.c_str() );
}
else m_bname = param["path"];
L1 = param["L1"];
L2 = param["L2"];
-
+
UTIL_THROW_IF2(m_bname.size() == 0, "Missing corpus base name at " << HERE);
UTIL_THROW_IF2(L1.size() == 0, "Missing L1 tag at " << HERE);
UTIL_THROW_IF2(L2.size() == 0, "Missing L2 tag at " << HERE);
@@ -157,11 +157,11 @@ namespace Moses
pair<string,string> dflt("input-factor","0");
input_factor = atoi(param.insert(dflt).first->second.c_str());
// shouldn't that be a string?
-
+
dflt = pair<string,string> ("output-factor","0");
output_factor = atoi(param.insert(dflt).first->second.c_str());
ofactor.assign(1,output_factor);
-
+
dflt = pair<string,string> ("smooth",".01");
m_lbop_conf = atof(param.insert(dflt).first->second.c_str());
@@ -177,7 +177,7 @@ namespace Moses
dflt = pair<string,string>("bias-loglevel","0");
m_bias_loglevel = atoi(param.insert(dflt).first->second.c_str());
-
+
dflt = pair<string,string>("table-limit","20");
m_tableLimit = atoi(param.insert(dflt).first->second.c_str());
@@ -188,25 +188,25 @@ namespace Moses
// in plain language: cache size is at least 1000, and 10,000 by default
// this cache keeps track of the most frequently used target
// phrase collections even when not actively in use
-
+
// Feature functions are initialized in function Load();
- param.insert(pair<string,string>("pfwd", "g"));
- param.insert(pair<string,string>("pbwd", "g"));
- param.insert(pair<string,string>("logcnt", "0"));
- param.insert(pair<string,string>("coh", "0"));
- param.insert(pair<string,string>("rare", "1"));
- param.insert(pair<string,string>("prov", "1"));
-
+ param.insert(pair<string,string>("pfwd", "g"));
+ param.insert(pair<string,string>("pbwd", "g"));
+ param.insert(pair<string,string>("logcnt", "0"));
+ param.insert(pair<string,string>("coh", "0"));
+ param.insert(pair<string,string>("rare", "1"));
+ param.insert(pair<string,string>("prov", "1"));
+
poolCounts = true;
-
+
// this is for pre-comuted sentence-level bias; DEPRECATED!
- if ((m = param.find("bias")) != param.end())
+ if ((m = param.find("bias")) != param.end())
m_bias_file = m->second;
- if ((m = param.find("bias-server")) != param.end())
+ if ((m = param.find("bias-server")) != param.end())
m_bias_server = m->second;
- if ((m = param.find("bias-logfile")) != param.end())
+ if ((m = param.find("bias-logfile")) != param.end())
{
m_bias_logfile = m->second;
if (m_bias_logfile == "/dev/stderr")
@@ -220,10 +220,10 @@ namespace Moses
}
}
- if ((m = param.find("lr-func")) != param.end())
+ if ((m = param.find("lr-func")) != param.end())
m_lr_func_name = m->second;
- if ((m = param.find("extra")) != param.end())
+ if ((m = param.find("extra")) != param.end())
m_extra_data = m->second;
dflt = pair<string,string>("tuneable","true");
@@ -239,7 +239,7 @@ namespace Moses
known_parameters.push_back("L1");
known_parameters.push_back("L2");
known_parameters.push_back("Mmsapt");
- known_parameters.push_back("PhraseDictionaryBitextSampling");
+ known_parameters.push_back("PhraseDictionaryBitextSampling");
// alias for Mmsapt
known_parameters.push_back("base"); // alias for path
known_parameters.push_back("bias");
@@ -259,7 +259,7 @@ namespace Moses
known_parameters.push_back("name");
known_parameters.push_back("num-features");
known_parameters.push_back("output-factor");
- known_parameters.push_back("path");
+ known_parameters.push_back("path");
known_parameters.push_back("pbwd");
known_parameters.push_back("pfwd");
known_parameters.push_back("prov");
@@ -275,12 +275,12 @@ namespace Moses
{
UTIL_THROW_IF2(!binary_search(known_parameters.begin(),
known_parameters.end(), m->first),
- HERE << ": Unknown parameter specification for Mmsapt: "
+ HERE << ": Unknown parameter specification for Mmsapt: "
<< m->first);
}
}
- void
+ void
Mmsapt::
load_bias(string const fname)
{
@@ -298,7 +298,7 @@ namespace Moses
// - sane word alignment?
vector<string> text1,text2,symal;
string line;
- filtering_istream in1,in2,ina;
+ filtering_istream in1,in2,ina;
open_input_stream(bname+L1+".txt.gz",in1);
open_input_stream(bname+L2+".txt.gz",in2);
@@ -314,7 +314,7 @@ namespace Moses
assert(btdyn);
cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
}
-
+
template<typename fftype>
void
Mmsapt::
@@ -334,7 +334,7 @@ namespace Moses
ff.reset(new fftype(spec));
register_ff(ff, m_active_ff_dyn);
}
- else
+ else
{
sptr<fftype> ff(new fftype(spec));
register_ff(ff, m_active_ff_common);
@@ -344,7 +344,7 @@ namespace Moses
template<typename fftype>
void
Mmsapt::
- check_ff(string const ffname, float const xtra,
+ check_ff(string const ffname, float const xtra,
vector<sptr<pscorer> >* registry)
{
string const& spec = param[ffname];
@@ -361,7 +361,7 @@ namespace Moses
ff.reset(new fftype(xtra,spec));
register_ff(ff, m_active_ff_dyn);
}
- else
+ else
{
sptr<fftype> ff(new fftype(xtra,spec));
register_ff(ff, m_active_ff_common);
@@ -394,28 +394,28 @@ namespace Moses
// standard (default) feature set
if (fsname == "standard")
{
- // lexical scores
+ // lexical scores
string lexfile = m_bname + L1 + "-" + L2 + ".lex";
- sptr<PScoreLex1<Token> >
+ sptr<PScoreLex1<Token> >
ff(new PScoreLex1<Token>(param["lex_alpha"],lexfile));
register_ff(ff,m_active_ff_common);
-
+
// these are always computed on pooled data
check_ff<PScoreRareness<Token> > ("rare", &m_active_ff_common);
check_ff<PScoreUnaligned<Token> >("unal", &m_active_ff_common);
check_ff<PScoreCoherence<Token> >("coh", &m_active_ff_common);
-
- // for these ones either way is possible (specification ends with '+'
- // if corpus-specific
+
+ // for these ones either way is possible (specification ends with '+'
+ // if corpus-specific
check_ff<PScorePfwd<Token> >("pfwd", m_lbop_conf);
check_ff<PScorePbwd<Token> >("pbwd", m_lbop_conf);
check_ff<PScoreLogCnt<Token> >("logcnt");
-
+
// These are always corpus-specific
check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_fix);
check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_dyn);
}
-
+
// data source features (copies of phrase and word count specific to
// this translation model)
else if (fsname == "datasource")
@@ -456,14 +456,14 @@ namespace Moses
btfix.m_num_workers = this->m_workers;
btfix.open(m_bname, L1, L2);
btfix.setDefaultSampleSize(m_default_sample_size);
-
+
btdyn.reset(new imbitext(btfix.V1, btfix.V2, m_default_sample_size, m_workers));
if (m_bias_file.size())
load_bias(m_bias_file);
-
- if (m_extra_data.size())
+
+ if (m_extra_data.size())
load_extra_data(m_extra_data, false);
-
+
#if 0
// currently not used
LexicalPhraseScorer2<Token>::table_t & COOC = calc_lex.scorer.COOC;
@@ -490,18 +490,18 @@ namespace Moses
}
- TargetPhrase*
+ TargetPhrase*
Mmsapt::
mkTPhrase(Phrase const& src,
- PhrasePair<Token>* fix,
- PhrasePair<Token>* dyn,
+ PhrasePair<Token>* fix,
+ PhrasePair<Token>* dyn,
sptr<Bitext<Token> > const& dynbt) const
{
- UTIL_THROW_IF2(!fix && !dyn, HERE <<
+ UTIL_THROW_IF2(!fix && !dyn, HERE <<
": Can't create target phrase from nothing.");
vector<float> fvals(this->m_numScoreComponents);
PhrasePair<Token> pool = fix ? *fix : *dyn;
- if (fix)
+ if (fix)
{
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(btfix, *fix, &fvals);
@@ -511,7 +511,7 @@ namespace Moses
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
(*ff)(*dynbt, *dyn, &fvals);
}
-
+
if (fix && dyn) { pool += *dyn; }
else if (fix)
{
@@ -533,7 +533,7 @@ namespace Moses
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
}
- if (fix)
+ if (fix)
{
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(btfix, pool, &fvals);
@@ -574,39 +574,39 @@ namespace Moses
const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
- for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
+ for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
{
InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase();
PrefixExists(ttask, phrase); // launches parallel lookup
}
- for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
+ for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
{
InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase();
- const TargetPhraseCollection *targetPhrases
+ const TargetPhraseCollection *targetPhrases
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
}
}
-
- TargetPhraseCollection const*
+
+ TargetPhraseCollection const*
Mmsapt::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
UTIL_THROW2("Don't call me without the translation task.");
}
- // This is not the most efficient way of phrase lookup!
- TargetPhraseCollection const*
+ // This is not the most efficient way of phrase lookup!
+ TargetPhraseCollection const*
Mmsapt::
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
{
// map from Moses Phrase to internal id sequence
- vector<id_type> sphrase;
+ vector<id_type> sphrase;
fillIdSeq(src,input_factor,*(btfix.V1),sphrase);
if (sphrase.size() == 0) return NULL;
-
+
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
// is set to a new copy of the dynamic bitext every time a sentence pair
// is added. /dyn/ keeps the old bitext around as long as we need it.
@@ -631,11 +631,11 @@ namespace Moses
<< mdyn.size() << " " << mdyn.getPid() << endl;
#endif
- if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
+ if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
return NULL; // phrase not found in either bitext
// do we have cached results for this phrase?
- uint64_t phrasekey = (mfix.size() == sphrase.size()
+ uint64_t phrasekey = (mfix.size() == sphrase.size()
? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1);
// get context-specific cache of items previously looked up
@@ -647,25 +647,25 @@ namespace Moses
// was stored as the time stamp. For each word in the
// vocabulary, we also store its most recent occurrence in the
// bitext. Only if the timestamp of each word in the phrase is
- // newer than the timestamp of the phrase itself we must update
- // the entry.
+ // newer than the timestamp of the phrase itself we must update
+ // the entry.
if (ret) return ret; // yes, was cached => DONE
-
+
// OK: pt entry NOT found or NOT up to date
- // lookup and expansion could be done in parallel threads,
+ // lookup and expansion could be done in parallel threads,
// but ppdyn is probably small anyway
// TO DO: have Bitexts return lists of PhrasePairs instead of pstats
- // no need to expand pstats at every single lookup again, especially
+ // no need to expand pstats at every single lookup again, especially
// for btfix.
sptr<pstats> sfix,sdyn;
-
+
if (mfix.size() == sphrase.size()) sfix = btfix.lookup(ttask, mfix);
if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn);
vector<PhrasePair<Token> > ppfix,ppdyn;
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
- if (sfix)
+ if (sfix)
{
expand(mfix, btfix, *sfix, ppfix, m_bias_log);
sort(ppfix.begin(), ppfix.end(),sort_by_tgt_id);
@@ -706,8 +706,8 @@ namespace Moses
#if 0
- if (combine_pstats(src,
- mfix.getPid(), sfix.get(), btfix,
+ if (combine_pstats(src,
+ mfix.getPid(), sfix.get(), btfix,
mdyn.getPid(), sdyn.get(), *dyn, ret))
{
#if 0
@@ -733,7 +733,7 @@ namespace Moses
return ret;
}
- size_t
+ size_t
Mmsapt::
SetTableLimit(size_t limit)
{
@@ -762,14 +762,14 @@ namespace Moses
throw "CreateRuleLookupManager is currently not supported in Mmsapt!";
}
- void
+ void
Mmsapt::
InitializeForInput(ttasksptr const& ttask)
{
sptr<ContextScope> const& scope = ttask->GetScope();
- sptr<ContextForQuery> context
+ sptr<ContextForQuery> context
= scope->get<ContextForQuery>(&btfix, true);
- if (m_bias_server.size() && context->bias == NULL)
+ if (m_bias_server.size() && context->bias == NULL)
{ // we need to create the bias
boost::unique_lock<boost::shared_mutex> lock(context->lock);
string const& context_words = ttask->GetContextString();
@@ -778,18 +778,18 @@ namespace Moses
if (m_bias_log)
{
*m_bias_log << HERE << endl
- << "BIAS LOOKUP CONTEXT: "
- << context_words << endl;
+ << "BIAS LOOKUP CONTEXT: "
+ << context_words << endl;
context->bias_log = m_bias_log;
}
- context->bias
+ context->bias
= btfix.SetupDocumentBias(m_bias_server, context_words, m_bias_log);
context->bias->loglevel = m_bias_loglevel;
context->bias->log = m_bias_log;
}
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
- }
+ }
boost::unique_lock<boost::shared_mutex> mylock(m_lock);
sptr<TPCollCache> localcache = scope->get<TPCollCache>(cache_key);
if (!localcache)
@@ -798,12 +798,12 @@ namespace Moses
else localcache = m_cache;
scope->set<TPCollCache>(cache_key, localcache);
}
-
+
if (m_lr_func_name.size() && m_lr_func == NULL)
{
FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name);
m_lr_func = dynamic_cast<LexicalReordering*>(lr);
- UTIL_THROW_IF2(lr == NULL, "FF " << m_lr_func_name
+ UTIL_THROW_IF2(lr == NULL, "FF " << m_lr_func_name
<< " does not seem to be a lexical reordering function!");
// todo: verify that lr_func implements a hierarchical reordering model
}
@@ -813,7 +813,7 @@ namespace Moses
// Mmsapt::
// PrefixExists(Moses::Phrase const& phrase) const
// {
- // return PrefixExists(phrase,NULL);
+ // return PrefixExists(phrase,NULL);
// }
bool
@@ -821,11 +821,11 @@ namespace Moses
PrefixExists(ttasksptr const& ttask, Moses::Phrase const& phrase) const
{
if (phrase.GetSize() == 0) return false;
- vector<id_type> myphrase;
+ vector<id_type> myphrase;
fillIdSeq(phrase,input_factor,*btfix.V1,myphrase);
-
+
TSA<Token>::tree_iterator mfix(btfix.I1.get(),&myphrase[0],myphrase.size());
- if (mfix.size() == myphrase.size())
+ if (mfix.size() == myphrase.size())
{
btfix.prep(ttask, mfix);
// cerr << phrase << " " << mfix.approxOccurrenceCount() << endl;
@@ -872,7 +872,7 @@ namespace Moses
// return btfix.SetupDocumentBias(bias);
// }
- vector<float>
+ vector<float>
Mmsapt
::DefaultWeights() const
{ return vector<float>(this->GetNumScoreComponents(), 1.); }
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index 4552ea8d2..5f688cfd8 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -38,13 +38,13 @@
// TO DO:
// - make lexical phrase scorer take addition to the "dynamic overlay" into account
// - switch to pool of sapts, where each sapt has its own provenance feature
-// RESEARCH QUESTION: is this more effective than having multiple phrase tables,
+// RESEARCH QUESTION: is this more effective than having multiple phrase tables,
// each with its own set of features?
namespace Moses
{
using namespace bitext;
- class Mmsapt
+ class Mmsapt
#ifndef NO_MOSES
: public PhraseDictionary
#endif
@@ -54,7 +54,7 @@ namespace Moses
friend class Alignment;
std::map<std::string,std::string> param;
std::string m_name;
- public:
+ public:
typedef L2R_Token<SimpleWordId> Token;
typedef mmBitext<Token> mmbitext;
typedef imBitext<Token> imbitext;
@@ -63,21 +63,21 @@ namespace Moses
typedef PhraseScorer<Token> pscorer;
private:
// vector<sptr<bitext> > shards;
- mmbitext btfix;
- sptr<imbitext> btdyn;
+ mmbitext btfix;
+ sptr<imbitext> btdyn;
std::string m_bname, m_extra_data, m_bias_file,m_bias_server;
std::string L1;
std::string L2;
float m_lbop_conf; // confidence level for lbop smoothing
float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
// alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
- // must be > 0 if dynamic
+ // must be > 0 if dynamic
size_t m_default_sample_size;
size_t m_workers; // number of worker threads for sampling the bitexts
std::vector<std::string> m_feature_set_names; // one or more of: standard, datasource
std::string m_bias_logfile;
boost::scoped_ptr<ofstream> m_bias_logger; // for logging to a file
- ostream* m_bias_log;
+ ostream* m_bias_log;
int m_bias_loglevel;
LexicalReordering* m_lr_func; // associated lexical reordering function
string m_lr_func_name; // name of associated lexical reordering function
@@ -88,47 +88,47 @@ namespace Moses
boost::shared_ptr<SamplingBias> m_bias; // for global default bias
boost::shared_ptr<TPCollCache> m_cache; // for global default bias
size_t m_cache_size; //
- size_t input_factor; //
+ size_t input_factor; //
size_t output_factor; // we can actually return entire Tokens!
// for display for human inspection (ttable dumps):
std::vector<std::string> m_feature_names; // names of features activated
- std::vector<bool> m_is_logval; // keeps track of which features are log valued
- std::vector<bool> m_is_integer; // keeps track of which features are integer valued
+ std::vector<bool> m_is_logval; // keeps track of which features are log valued
+ std::vector<bool> m_is_integer; // keeps track of which features are integer valued
std::vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
std::vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
- std::vector<sptr<pscorer > > m_active_ff_common;
+ std::vector<sptr<pscorer > > m_active_ff_common;
// activated feature functions (dyn)
- void
+ void
register_ff(sptr<pscorer> const& ff, std::vector<sptr<pscorer> > & registry);
template<typename fftype>
- void
+ void
check_ff(std::string const ffname,std::vector<sptr<pscorer> >* registry = NULL);
- // add feature function if specified
-
+ // add feature function if specified
+
template<typename fftype>
- void
- check_ff(std::string const ffname, float const xtra,
+ void
+ check_ff(std::string const ffname, float const xtra,
std::vector<sptr<pscorer> >* registry = NULL);
// add feature function if specified
void
add_corpus_specific_features(std::vector<sptr<pscorer > >& ffvec);
-
+
// built-in feature functions
// PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
// PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
- // PScoreLex<Token> calc_lex;
+ // PScoreLex<Token> calc_lex;
// this one I'd like to see as an external ff eventually
- // PScorePC<Token> apply_pp; // apply phrase penalty
+ // PScorePC<Token> apply_pp; // apply phrase penalty
// PScoreLogCounts<Token> add_logcounts_fix;
// PScoreLogCounts<Token> add_logcounts_dyn;
void init(std::string const& line);
mutable boost::shared_mutex m_lock;
- // mutable boost::shared_mutex m_cache_lock;
+ // mutable boost::shared_mutex m_cache_lock;
// for more complex operations on the cache
bool withPbwd;
bool poolCounts;
@@ -141,25 +141,25 @@ namespace Moses
void read_config_file(std::string fname, std::map<std::string,std::string>& param);
// phrase table feature weights for alignment:
- std::vector<float> feature_weights;
+ std::vector<float> feature_weights;
- std::vector<std::vector<id_type> > wlex21;
+ std::vector<std::vector<id_type> > wlex21;
// word translation lexicon (without counts, get these from calc_lex.COOC)
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
mm2dtable_t COOCraw;
- TargetPhrase*
- mkTPhrase(Phrase const& src,
- Moses::bitext::PhrasePair<Token>* fix,
- Moses::bitext::PhrasePair<Token>* dyn,
+ TargetPhrase*
+ mkTPhrase(Phrase const& src,
+ Moses::bitext::PhrasePair<Token>* fix,
+ Moses::bitext::PhrasePair<Token>* dyn,
sptr<Bitext<Token> > const& dynbt) const;
void
process_pstats
(Phrase const& src,
- uint64_t const pid1,
- pstats const& stats,
- Bitext<Token> const & bt,
+ uint64_t const pid1,
+ pstats const& stats,
+ Bitext<Token> const & bt,
TargetPhraseCollection* tpcoll
) const;
@@ -169,16 +169,16 @@ namespace Moses
uint64_t const pid1a, pstats * statsa, Bitext<Token> const & bta,
uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const;
-
+
bool
combine_pstats
- (Phrase const& src,
+ (Phrase const& src,
uint64_t const pid1a, pstats* statsa, Bitext<Token> const & bta,
- uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
+ uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const;
void load_extra_data(std::string bname, bool locking);
- void load_bias(std::string bname);
+ void load_bias(std::string bname);
public:
// Mmsapt(std::string const& description, std::string const& line);
@@ -190,22 +190,22 @@ namespace Moses
std::string const& GetName() const;
#ifndef NO_MOSES
- TargetPhraseCollection const*
+ TargetPhraseCollection const*
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
- TargetPhraseCollection const*
+ TargetPhraseCollection const*
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
- void
+ void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
const InputPathList &inputPathQueue) const;
-
+
//! Create a sentence-specific manager for SCFG rule lookup.
ChartRuleLookupManager*
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &);
-
+
ChartRuleLookupManager*
- CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &,
+ CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &,
std::size_t);
#endif
@@ -222,7 +222,7 @@ namespace Moses
bool ProvidesPrefixCheck() const; // return true if prefix /phrase/ check exists
// bool PrefixExists(Phrase const& phrase, SamplingBias const* const bias) const;
bool PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const;
-
+
bool isLogVal(int i) const;
bool isInteger(int i) const;
@@ -232,7 +232,7 @@ namespace Moses
void CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
// align two new sentences
- sptr<std::vector<int> >
+ sptr<std::vector<int> >
align(std::string const& src, std::string const& trg) const;
std::vector<std::string> const&
diff --git a/moses/TranslationModel/UG/mmsapt_align.cc b/moses/TranslationModel/UG/mmsapt_align.cc
index 65cf979e1..13d8387d2 100644
--- a/moses/TranslationModel/UG/mmsapt_align.cc
+++ b/moses/TranslationModel/UG/mmsapt_align.cc
@@ -6,7 +6,7 @@
// using namespace bitext;
// using namespace std;
// using namespace boost;
-
+
// struct PPgreater
// {
// bool operator()(PhrasePair const& a, PhrasePair const& b)
@@ -28,7 +28,7 @@
// PhrasePair pp;
// ushort s1,e1,s2,e2; // start and end positions
// int prev; // preceding alignment hypothesis
-// float score;
+// float score;
// bitvector scov; // source coverage
// PhraseAlnHyp(PhrasePair const& ppx, int slen,
// pair<uint32_t,uint32_t> const& sspan,
@@ -37,7 +37,7 @@
// {
// s1 = sspan.first; e1 = sspan.second;
// s2 = tspan.first; e2 = tspan.second;
-// for (size_t i = s1; i < e1; ++i)
+// for (size_t i = s1; i < e1; ++i)
// scov.set(i);
// }
@@ -78,13 +78,13 @@
// return po_other;
// }
-// float
+// float
// dprob_fwd(PhraseAlnHyp const& next)
// {
// return pp.dfwd[po_fwd(&next)];
// }
-// float
+// float
// dprob_bwd(PhraseAlnHyp const& prev)
// {
// return pp.dbwd[po_bwd(&prev)];
@@ -102,15 +102,15 @@
// typedef pstats::trg_map_t jStatsTable;
// Mmsapt const& PT;
-// vector<id_type> s,t;
+// vector<id_type> s,t;
// pidmap_t sspan2pid, tspan2pid; // span -> phrase ID
// pid2span_t spid2span,tpid2span;
// vector<vector<sptr<pstats> > > spstats;
-// vector<PhrasePair> PP;
+// vector<PhrasePair> PP;
// // position-independent phrase pair info
// public:
-// vector<PhraseAlnHyp> PAH;
+// vector<PhraseAlnHyp> PAH;
// vector<vector<int> > tpos2ahyp;
// // maps from target start positions to PhraseAlnHyps starting at
// // that position
@@ -120,8 +120,8 @@
// void fill_sspan_maps();
// public:
// Alignment(Mmsapt const& pt, string const& src, string const& trg);
-// void show(ostream& out);
-// void show(ostream& out, PhraseAlnHyp const& ah);
+// void show(ostream& out);
+// void show(ostream& out, PhraseAlnHyp const& ah);
// };
// void
@@ -129,11 +129,11 @@
// show(ostream& out, PhraseAlnHyp const& ah)
// {
// #if 0
-// LexicalPhraseScorer2<Token>::table_t const&
+// LexicalPhraseScorer2<Token>::table_t const&
// COOCjnt = PT.calc_lex.scorer.COOC;
// out << setw(10) << exp(ah.score) << " "
-// << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2)
+// << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2)
// << " <=> "
// << PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
// vector<uchar> const& a = ah.pp.aln;
@@ -168,7 +168,7 @@
// // << "]" << endl;
// #endif
// }
-
+
// void
// Alignment::
// show(ostream& out)
@@ -192,7 +192,7 @@
// return spstats[sspan.first][k];
// else return sptr<pstats>();
// }
-
+
// void
// Alignment::
// fill_tspan_maps()
@@ -207,7 +207,7 @@
// tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
// tspan2pid[i][k] = pid;
// }
-// }
+// }
// }
// void
@@ -230,11 +230,11 @@
// int y = p->second[0].second-1;
// spstats[i].push_back(spstats[x][y-x]);
// }
-// else
+// else
// {
// spstats[i].push_back(PT.btfix.lookup(m));
// cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
-// << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt
+// << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt
// << endl;
// }
// spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
@@ -262,14 +262,14 @@
// // size_t m2 = COOC.m2(i);
// // if (j*1000 > m1 && j*1000 > m2)
// // cout << " " << (*PT.btfix.V1)[k];
-// // }
+// // }
// // }
// // cout << endl;
// // }
-
+
// fill_tspan_maps();
// fill_sspan_maps();
-// tpos2ahyp.resize(t.size());
+// tpos2ahyp.resize(t.size());
// // now fill the association score table
// PAH.reserve(1000000);
// typedef pid2span_t::iterator psiter;
@@ -301,12 +301,12 @@
// }
// }
-
+
// int
// extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
// {
-// if ((PAH[edge].scov & PAH[next].scov).count())
+// if ((PAH[edge].scov & PAH[next].scov).count())
// return -1;
// int ret = PAH.size();
// PAH.push_back(PAH[next]);
diff --git a/moses/TranslationModel/UG/ptable-describe-features.cc b/moses/TranslationModel/UG/ptable-describe-features.cc
index dbd5accb9..c9dd3abd1 100644
--- a/moses/TranslationModel/UG/ptable-describe-features.cc
+++ b/moses/TranslationModel/UG/ptable-describe-features.cc
@@ -19,7 +19,7 @@ int main()
{
if (line.empty()) continue;
size_t k = line.find_first_not_of(" ");
- if (line.find("Mmsapt") != k &&
+ if (line.find("Mmsapt") != k &&
line.find("PhraseDictionaryBitextSampling") != k)
continue;
Mmsapt PT(line);
@@ -32,6 +32,6 @@ int main()
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc
index e165011c7..94627a02c 100644
--- a/moses/TranslationModel/UG/ptable-lookup.cc
+++ b/moses/TranslationModel/UG/ptable-lookup.cc
@@ -19,13 +19,13 @@ class SimplePhrase : public Moses::Phrase
vector<FactorType> const m_fo; // factor order
public:
SimplePhrase(): m_fo(1,FactorType(0)) {}
-
- void init(string const& s)
+
+ void init(string const& s)
{
istringstream buf(s); string w;
- while (buf >> w)
+ while (buf >> w)
{
- Word wrd;
+ Word wrd;
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
}
}
@@ -63,15 +63,15 @@ int main(int argc, char* argv[])
cerr << "Phrase table implementation not supported by this utility." << endl;
exit(1);
}
-
+
string line;
while (true)
{
Sentence phrase;
if (!phrase.Read(cin,ifo)) break;
- if (pdta)
+ if (pdta)
{
- pdta->InitializeForInput(phrase);
+ pdta->InitializeForInput(phrase);
// do we also need to call CleanupAfterSentenceProcessing at the end?
}
Phrase& p = phrase;
@@ -79,13 +79,13 @@ int main(int argc, char* argv[])
cout << p << endl;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
if (!trg) continue;
- vector<size_t> order(trg->GetSize());
+ vector<size_t> order(trg->GetSize());
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg));
size_t k = 0;
- // size_t precision =
+ // size_t precision =
cout.precision(2);
-
+
vector<string> fname;
if (mmsapt)
{
@@ -119,6 +119,6 @@ int main(int argc, char* argv[])
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/sapt_phrase_key.h b/moses/TranslationModel/UG/sapt_phrase_key.h
index e1ecf1573..0caf11e43 100644
--- a/moses/TranslationModel/UG/sapt_phrase_key.h
+++ b/moses/TranslationModel/UG/sapt_phrase_key.h
@@ -8,6 +8,6 @@ namespace sapt
using namespace Moses;
using namespace std;
-
+
}
diff --git a/moses/TranslationModel/UG/sapt_phrase_scorers.h b/moses/TranslationModel/UG/sapt_phrase_scorers.h
index 9870ed7f0..ace907d73 100644
--- a/moses/TranslationModel/UG/sapt_phrase_scorers.h
+++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Phrase scoring functions for suffix array-based phrase tables
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "sapt_pscore_unaligned.h" // count # of unaligned words
#include "sapt_pscore_provenance.h" // reward for joint phrase occ. per corpus
diff --git a/moses/TranslationModel/UG/sapt_pscore_base.h b/moses/TranslationModel/UG/sapt_pscore_base.h
index ff705f952..388c83d9b 100644
--- a/moses/TranslationModel/UG/sapt_pscore_base.h
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Base classes for suffix array-based phrase scorers
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -21,72 +21,72 @@ namespace Moses {
string m_tag;
vector<string> m_feature_names;
public:
-
- virtual
- void
- operator()(Bitext<Token> const& pt,
- PhrasePair<Token>& pp,
- vector<float> * dest=NULL)
+
+ virtual
+ void
+ operator()(Bitext<Token> const& pt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest=NULL)
const = 0;
void
setIndex(int const i) { m_index = i; }
-
+
int
getIndex() const { return m_index; }
- int
+ int
fcnt() const { return m_num_feats; }
-
+
vector<string> const &
fnames() const { return m_feature_names; }
string const &
fname(int i) const
- {
+ {
if (i < 0) i += m_num_feats;
UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
"Feature name index out of range at " << HERE);
- return m_feature_names.at(i);
+ return m_feature_names.at(i);
}
virtual
bool
- isLogVal(int i) const { return true; };
- // is this feature log valued?
-
+ isLogVal(int i) const { return true; };
+ // is this feature log valued?
+
virtual
bool
- isIntegerValued(int i) const { return false; };
- // is this feature integer valued (e.g., count features)?
+ isIntegerValued(int i) const { return false; };
+ // is this feature integer valued (e.g., count features)?
virtual
bool
allowPooling() const { return true; }
- // does this feature function allow pooling of counts if
+ // does this feature function allow pooling of counts if
// there are no occurrences in the respective corpus?
-
+
virtual
void
load() { }
};
- // base class for 'families' of phrase scorers that have a single
+ // base class for 'families' of phrase scorers that have a single
template<typename Token>
class
- SingleRealValuedParameterPhraseScorerFamily
+ SingleRealValuedParameterPhraseScorerFamily
: public PhraseScorer<Token>
{
protected:
vector<float> m_x;
- virtual
- void
- init(string const specs)
- {
+ virtual
+ void
+ init(string const specs)
+ {
using namespace boost;
- UTIL_THROW_IF2(this->m_tag.size() == 0,
+ UTIL_THROW_IF2(this->m_tag.size() == 0,
"m_tag must be initialized in constructor");
UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
UTIL_THROW_IF2(this->m_feature_names.size(),
diff --git a/moses/TranslationModel/UG/sapt_pscore_coherence.h b/moses/TranslationModel/UG/sapt_pscore_coherence.h
index a3211df54..c201c9651 100644
--- a/moses/TranslationModel/UG/sapt_pscore_coherence.h
+++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -13,16 +13,16 @@ namespace Moses {
PScoreCoherence : public PhraseScorer<Token>
{
public:
- PScoreCoherence(string const dummy)
- {
+ PScoreCoherence(string const dummy)
+ {
this->m_index = -1;
this->m_num_feats = 1;
this->m_feature_names.push_back(string("coherence"));
}
-
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
diff --git a/moses/TranslationModel/UG/sapt_pscore_lex1.h b/moses/TranslationModel/UG/sapt_pscore_lex1.h
index a8e83da51..76ca2a9a4 100644
--- a/moses/TranslationModel/UG/sapt_pscore_lex1.h
+++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Phrase scorer that counts the number of unaligend words in the phrase
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "sapt_pscore_base.h"
@@ -17,11 +17,11 @@ namespace Moses {
string m_lexfile;
public:
LexicalPhraseScorer2<Token> scorer;
-
- PScoreLex1(string const& alphaspec, string const& lexfile)
- {
+
+ PScoreLex1(string const& alphaspec, string const& lexfile)
+ {
this->m_index = -1;
- this->m_num_feats = 2;
+ this->m_num_feats = 2;
this->m_feature_names.reserve(2);
this->m_feature_names.push_back("lexfwd");
this->m_feature_names.push_back("lexbwd");
@@ -31,13 +31,13 @@ namespace Moses {
void
load()
- {
- scorer.open(m_lexfile);
+ {
+ scorer.open(m_lexfile);
}
-
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -48,27 +48,27 @@ namespace Moses {
cout << len1 << " " << len2 << endl;
Token const* t1 = bt.T1->sntStart(sid1);
for (size_t i = off1; i < off1 + len1; ++i)
- cout << (*bt.V1)[t1[i].id()] << " ";
+ cout << (*bt.V1)[t1[i].id()] << " ";
cout << __FILE__ << ":" << __LINE__ << endl;
-
+
Token const* t2 = bt.T2->sntStart(sid2);
for (size_t i = off2; i < off2 + len2; ++i)
- cout << (*bt.V2)[t2[i].id()] << " ";
+ cout << (*bt.V2)[t2[i].id()] << " ";
cout << __FILE__ << ":" << __LINE__ << endl;
-
+
BOOST_FOREACH (int a, pp.aln)
cout << a << " " ;
cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
-
+
scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
bt.T2->sntStart(sid2)+off2,0,len2,
pp.aln, m_alpha,
(*dest)[this->m_index],
(*dest)[this->m_index+1]);
#endif
- scorer.score(pp.start1,0, pp.len1,
- pp.start2,0, pp.len2, pp.aln, m_alpha,
- (*dest)[this->m_index],
+ scorer.score(pp.start1,0, pp.len1,
+ pp.start2,0, pp.len2, pp.aln, m_alpha,
+ (*dest)[this->m_index],
(*dest)[this->m_index+1]);
}
};
diff --git a/moses/TranslationModel/UG/sapt_pscore_logcnt.h b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
index 2790323ed..9dc5ac7ba 100644
--- a/moses/TranslationModel/UG/sapt_pscore_logcnt.h
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@@ -2,7 +2,7 @@
// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
// with the asymptotic function x/(j+x) where x > 0 is a function
// parameter that determines the steepness of the rewards curve
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -10,15 +10,15 @@
using namespace std;
namespace Moses {
namespace bitext {
-
+
template<typename Token>
class
PScoreLogCnt : public PhraseScorer<Token>
{
string m_specs;
public:
- PScoreLogCnt(string const specs)
- {
+ PScoreLogCnt(string const specs)
+ {
this->m_index = -1;
this->m_specs = specs;
if (specs.find("r1") != string::npos) // raw source phrase counts
@@ -35,11 +35,11 @@ namespace Moses {
}
bool
- isIntegerValued(int i) const { return true; }
+ isIntegerValued(int i) const { return true; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -49,15 +49,15 @@ namespace Moses {
assert(pp.joint);
assert(pp.raw2);
size_t i = this->m_index;
- if (m_specs.find("r1") != string::npos)
+ if (m_specs.find("r1") != string::npos)
(*dest)[i++] = log(pp.raw1);
- if (m_specs.find("s1") != string::npos)
+ if (m_specs.find("s1") != string::npos)
(*dest)[i++] = log(pp.sample1);
- if (m_specs.find("g1") != string::npos)
+ if (m_specs.find("g1") != string::npos)
(*dest)[i++] = log(pp.good1);
- if (m_specs.find("j") != string::npos)
+ if (m_specs.find("j") != string::npos)
(*dest)[i++] = log(pp.joint);
- if (m_specs.find("r2") != string::npos)
+ if (m_specs.find("r2") != string::npos)
(*dest)[++i] = log(pp.raw2);
}
};
diff --git a/moses/TranslationModel/UG/sapt_pscore_pbwd.h b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
index f7b4686d7..9366777ef 100644
--- a/moses/TranslationModel/UG/sapt_pscore_pbwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
@@ -1,5 +1,5 @@
//-*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -15,12 +15,12 @@ namespace Moses {
{
float conf;
string denom;
-
+
public:
- PScorePbwd(float const c, string d)
- {
+ PScorePbwd(float const c, string d)
+ {
this->m_index = -1;
- conf = c;
+ conf = c;
denom = d;
size_t checksum = d.size();
BOOST_FOREACH(char const& x, denom)
@@ -36,13 +36,13 @@ namespace Moses {
<< d << "' for Pbwd phrase scorer at " << HERE);
}
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
- // we use the denominator specification to scale the raw counts on the
+ // we use the denominator specification to scale the raw counts on the
// target side; the clean way would be to counter-sample
size_t i = this->m_index;
BOOST_FOREACH(char const& x, denom)
diff --git a/moses/TranslationModel/UG/sapt_pscore_pfwd.h b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
index ed48a93d2..c5de210a1 100644
--- a/moses/TranslationModel/UG/sapt_pscore_pfwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -18,10 +18,10 @@ namespace Moses {
public:
- PScorePfwd(float const c, string d)
- {
+ PScorePfwd(float const c, string d)
+ {
this->m_index = -1;
- conf = c;
+ conf = c;
denom = d;
size_t checksum = d.size();
BOOST_FOREACH(char const& x, denom)
@@ -32,17 +32,17 @@ namespace Moses {
this->m_feature_names.push_back(s);
}
this->m_num_feats = this->m_feature_names.size();
- UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
- "Unknown parameter in specification '"
+ UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
+ "Unknown parameter in specification '"
<< d << "' for Pfwd phrase scorer at " << HERE);
}
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp,
+
+ void
+ operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
- if (pp.joint > pp.good1)
+ if (pp.joint > pp.good1)
{
pp.joint = pp.good1;
// cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
@@ -53,18 +53,18 @@ namespace Moses {
{
switch (c)
{
- case 'g':
- (*dest)[i++] = log(lbop(pp.good1, pp.joint, conf));
+ case 'g':
+ (*dest)[i++] = log(lbop(pp.good1, pp.joint, conf));
break;
- case 's':
- (*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf));
+ case 's':
+ (*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf));
break;
case 'r':
- (*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf));
+ (*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf));
}
}
}
};
}
}
-
+
diff --git a/moses/TranslationModel/UG/sapt_pscore_phrasecount.h b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
index e0a6eb48b..e0ce40117 100644
--- a/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
+++ b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -20,15 +20,15 @@ namespace Moses {
this->m_num_feats = 1;
this->m_feature_names.push_back(string("phrasecount"));
}
-
- void
+
+ void
operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
(*dest)[this->m_index] = 1;
- }
+ }
};
}
}
diff --git a/moses/TranslationModel/UG/sapt_pscore_provenance.h b/moses/TranslationModel/UG/sapt_pscore_provenance.h
index c33b98fe7..ee7b08bda 100644
--- a/moses/TranslationModel/UG/sapt_pscore_provenance.h
+++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h
@@ -2,7 +2,7 @@
// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
// with the asymptotic function j/(j+x) where x > 0 is a function
// parameter that determines the steepness of the rewards curve
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -10,7 +10,7 @@
using namespace std;
namespace Moses {
namespace bitext {
-
+
// asymptotic provenance feature n/(n+x)
template<typename Token>
class
@@ -18,18 +18,18 @@ namespace Moses {
{
public:
- PScoreProvenance(string const& spec)
+ PScoreProvenance(string const& spec)
{
this->m_tag = "prov";
this->init(spec);
}
-
+
bool
- isLogVal(int i) const { return false; }
+ isLogVal(int i) const { return false; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -39,7 +39,7 @@ namespace Moses {
}
bool
- allowPooling() const
+ allowPooling() const
{ return false; }
};
diff --git a/moses/TranslationModel/UG/sapt_pscore_rareness.h b/moses/TranslationModel/UG/sapt_pscore_rareness.h
index 58f204c88..34979243c 100644
--- a/moses/TranslationModel/UG/sapt_pscore_rareness.h
+++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h
@@ -2,7 +2,7 @@
// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
// with the asymptotic function x/(j+x) where x > 0 is a function
// parameter that determines the steepness of the rewards curve
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -10,25 +10,25 @@
using namespace std;
namespace Moses {
namespace bitext {
-
+
// rareness penalty: x/(n+x)
template<typename Token>
class
PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
{
public:
- PScoreRareness(string const spec)
+ PScoreRareness(string const spec)
{
this->m_tag = "rare";
this->init(spec);
}
bool
- isLogVal(int i) const { return false; }
+ isLogVal(int i) const { return false; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
diff --git a/moses/TranslationModel/UG/sapt_pscore_unaligned.h b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
index dafc1e129..8dceb1ad0 100644
--- a/moses/TranslationModel/UG/sapt_pscore_unaligned.h
+++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Phrase scorer that counts the number of unaligend words in the phrase
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -14,7 +14,7 @@ namespace Moses {
{
typedef boost::dynamic_bitset<typename ::uint64_t> bitvector;
public:
- PScoreUnaligned(string const spec)
+ PScoreUnaligned(string const spec)
{
this->m_index = -1;
int f = this->m_num_feats = atoi(spec.c_str());
@@ -28,16 +28,16 @@ namespace Moses {
this->m_feature_names[1] = "unal-t";
}
}
-
+
bool
- isLogVal(int i) const { return false; }
-
+ isLogVal(int i) const { return false; }
+
bool
- isIntegerValued(int i) const { return true; }
+ isIntegerValued(int i) const { return true; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -46,9 +46,9 @@ namespace Moses {
// parse_pid(pp.p2, sid2, off2, len2);
bitvector check1(pp.len1),check2(pp.len2);
for (size_t i = 0; i < pp.aln.size(); )
- {
- check1.set(pp.aln[i++]);
- check2.set(pp.aln.at(i++));
+ {
+ check1.set(pp.aln[i++]);
+ check2.set(pp.aln.at(i++));
}
if (this->m_num_feats == 1)
diff --git a/moses/TranslationModel/UG/sapt_pscore_wordcount.h b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
index 3227bb6ba..a5000be37 100644
--- a/moses/TranslationModel/UG/sapt_pscore_wordcount.h
+++ b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -13,7 +13,7 @@ namespace Moses {
class
PScoreWC : public PhraseScorer<Token>
{
- public:
+ public:
PScoreWC(string const dummy)
{
this->m_index = -1;
@@ -21,14 +21,14 @@ namespace Moses {
this->m_feature_names.push_back(string("wordcount"));
}
- void
+ void
operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
(*dest)[this->m_index] = pp.len2;
- }
+ }
};
}
}
diff --git a/moses/TranslationModel/UG/sim-pe.cc b/moses/TranslationModel/UG/sim-pe.cc
index 460d66c1f..00a705936 100644
--- a/moses/TranslationModel/UG/sim-pe.cc
+++ b/moses/TranslationModel/UG/sim-pe.cc
@@ -15,7 +15,7 @@ using namespace boost;
vector<FactorType> fo(1,FactorType(0));
-ostream&
+ostream&
operator<<(ostream& out, Hypothesis const* x)
{
vector<const Hypothesis*> H;
@@ -24,7 +24,7 @@ operator<<(ostream& out, Hypothesis const* x)
for (; H.size(); H.pop_back())
{
Phrase const& p = H.back()->GetCurrTargetPhrase();
- for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
+ for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
out << *p.GetFactor(pos, 0) << (H.size() ? " " : "");
}
return out;
@@ -33,19 +33,19 @@ operator<<(ostream& out, Hypothesis const* x)
vector<FactorType> ifo;
size_t lineNumber;
-string
+string
translate(string const& source)
{
StaticData const& global = StaticData::Instance();
- Sentence sentence;
- istringstream ibuf(source+"\n");
+ Sentence sentence;
+ istringstream ibuf(source+"\n");
sentence.Read(ibuf,ifo);
// Manager manager(lineNumber, sentence, global.GetSearchAlgorithm());
Manager manager(sentence, global.GetSearchAlgorithm());
manager.ProcessSentence();
-
+
ostringstream obuf;
const Hypothesis* h = manager.GetBestHypothesis();
obuf << h;
@@ -58,7 +58,7 @@ int main(int argc, char* argv[])
Parameter params;
if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
exit(1);
-
+
StaticData const& global = StaticData::Instance();
global.SetVerboseLevel(0);
ifo = global.GetInputFactorOrder();
@@ -79,6 +79,6 @@ int main(int argc, char* argv[])
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage.cc b/moses/TranslationModel/UG/spe-check-coverage.cc
index 6e838ad04..378dd800f 100644
--- a/moses/TranslationModel/UG/spe-check-coverage.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage.cc
@@ -24,13 +24,13 @@ class SimplePhrase : public Moses::Phrase
vector<FactorType> const m_fo; // factor order
public:
SimplePhrase(): m_fo(1,FactorType(0)) {}
-
- void init(string const& s)
+
+ void init(string const& s)
{
istringstream buf(s); string w;
- while (buf >> w)
+ while (buf >> w)
{
- Word wrd;
+ Word wrd;
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
}
}
@@ -45,7 +45,7 @@ public:
bool operator()(size_t a, size_t b) const
{
// return cmp(*my_tpc[a], *my_tpc[b]);
- return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
+ return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
my_tpc[b]->GetScoreBreakdown().GetWeightedScore());
}
};
@@ -59,7 +59,7 @@ int main(int argc, char* argv[])
argfilter[1] = std::make_pair(string("--spe-trg"),1);
argfilter[2] = std::make_pair(string("--spe-aln"),1);
argfilter[3] = std::make_pair(string("--spe-show"),1);
-
+
char** my_args; int my_acnt;
char** mo_args; int mo_acnt;
filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
@@ -77,9 +77,9 @@ int main(int argc, char* argv[])
else if (!strcmp(my_args[i],"--spe-show"))
vlevel = my_args[i+1];
}
-
+
Parameter params;
- if (!params.LoadParam(mo_acnt,mo_args) ||
+ if (!params.LoadParam(mo_acnt,mo_args) ||
!StaticData::LoadDataStatic(&params, mo_args[0]))
exit(1);
@@ -95,15 +95,15 @@ int main(int argc, char* argv[])
exit(1);
}
mmsapt->SetTableLimit(0);
-
+
string srcline,trgline,alnline;
cout.precision(2);
vector<string> fname = mmsapt->GetFeatureNames();
while (getline(spe_src,srcline))
{
- UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
+ UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
<< ": missing data for online updates.");
- UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
+ UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
<< ": missing data for online updates.");
cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl;
@@ -127,29 +127,29 @@ int main(int argc, char* argv[])
if (!mmsapt->PrefixExists(p)) break;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
if (!trg || !trg->GetSize()) continue;
-
+
bool header_done = false;
bool has_dynamic_match = vlevel == "all" || vlevel == "ALL";
- vector<size_t> order; order.reserve(trg->GetSize());
+ vector<size_t> order; order.reserve(trg->GetSize());
size_t stop = trg->GetSize();
vector<size_t> o2(trg->GetSize());
for (size_t i = 0; i < stop; ++i) o2[i] = i;
sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg));
-
+
for (size_t r = 0; r < stop; ++r) // r for rank
{
if (vlevel != "ALL")
{
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
- ostringstream buf; buf << phr;
- string tphrase = buf.str();
+ ostringstream buf; buf << phr;
+ string tphrase = buf.str();
tphrase.erase(tphrase.size()-1);
size_t s = trgline.find(tphrase);
if (s == string::npos) continue;
size_t e = s + tphrase.size();
if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' '))
- continue;
+ continue;
}
order.push_back(r);
if (!has_dynamic_match)
@@ -170,7 +170,7 @@ int main(int argc, char* argv[])
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
FVector const& scores = scc.GetScoresVector();
float wscore = scc.GetWeightedScore();
- if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
+ if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
continue;
if (!header_done)
{
@@ -201,7 +201,7 @@ int main(int argc, char* argv[])
}
cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f);
}
- cout << " " << format("%10.3e") % exp(wscore)
+ cout << " " << format("%10.3e") % exp(wscore)
<< " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl;
}
mmsapt->Release(trg);
@@ -213,6 +213,6 @@ int main(int argc, char* argv[])
// }
exit(0);
}
-#endif
-
+#endif
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage2.cc b/moses/TranslationModel/UG/spe-check-coverage2.cc
index fa9ce1c85..3b4f559d2 100644
--- a/moses/TranslationModel/UG/spe-check-coverage2.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage2.cc
@@ -20,7 +20,7 @@ typedef Bitext<Token>::iter iter;
mmbitext bg;
-void
+void
show(ostream& out, iter& f)
{
iter b(bg.I2.get(),f.getToken(0),f.size());
@@ -29,11 +29,11 @@ show(ostream& out, iter& f)
else
out << string(12,' ');
out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
- out << f.str(bg.V1.get()) << endl;
+ out << f.str(bg.V1.get()) << endl;
}
-void
+void
dump(ostream& out, iter& f)
{
float cnt = f.size() ? f.approxOccurrenceCount() : 0;
@@ -44,12 +44,12 @@ dump(ostream& out, iter& f)
while (f.over());
f.up();
}
- if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
+ if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
show(out,f);
}
-void
+void
read_data(string fname, vector<string>& dest)
{
ifstream in(fname.c_str());
@@ -71,6 +71,6 @@ int main(int argc, char* argv[])
dump(cout,mfg);
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage3.cc b/moses/TranslationModel/UG/spe-check-coverage3.cc
index ea8c85e99..a62daa7b8 100644
--- a/moses/TranslationModel/UG/spe-check-coverage3.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage3.cc
@@ -22,7 +22,7 @@ typedef Bitext<Token>::iter iter;
mmbitext bg;
vector<string> src,trg,aln;
-void
+void
show(ostream& out, iter& f)
{
iter b(bg.I2.get(),f.getToken(0),f.size());
@@ -31,11 +31,11 @@ show(ostream& out, iter& f)
else
out << string(12,' ');
out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
- out << f.str(bg.V1.get()) << endl;
+ out << f.str(bg.V1.get()) << endl;
}
-void
+void
dump(ostream& out, iter& f)
{
float cnt = f.size() ? f.approxOccurrenceCount() : 0;
@@ -46,12 +46,12 @@ dump(ostream& out, iter& f)
while (f.over());
f.up();
}
- if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
+ if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
show(out,f);
}
-void
+void
read_data(string fname, vector<string>& dest)
{
ifstream in(fname.c_str());
@@ -60,14 +60,14 @@ read_data(string fname, vector<string>& dest)
in.close();
}
-void
-show_snt(ostream& out, TokenIndex const& V, vector<Token> const& snt,
+void
+show_snt(ostream& out, TokenIndex const& V, vector<Token> const& snt,
vector<vector<int> > const& a)
{
for (size_t i = 0; i < snt.size(); ++i)
{
cout << format("%d:%s[") % i % V[snt[i].id()];
- for (size_t k = 0; k < a[i].size(); ++k)
+ for (size_t k = 0; k < a[i].size(); ++k)
cout << (k?",":"") << a[i][k];
cout << "] ";
}
@@ -77,7 +77,7 @@ show_snt(ostream& out, TokenIndex const& V, vector<Token> const& snt,
void show_pair(size_t const sid)
{
- vector<Token> s,t;
+ vector<Token> s,t;
fill_token_seq(*bg.V1,src[sid],s);
fill_token_seq(*bg.V2,trg[sid],t);
vector<vector<int> > a1(s.size()),a2(t.size());
@@ -97,11 +97,11 @@ void show_pair(size_t const sid)
int main(int argc, char* argv[])
{
- if (argc < 5)
+ if (argc < 5)
{
- cerr << "usage: " << argv[0]
- << " <bg base name> <L1> <L2> <fg base name>"
- << endl;
+ cerr << "usage: " << argv[0]
+ << " <bg base name> <L1> <L2> <fg base name>"
+ << endl;
exit(1);
}
bg.open(argv[1],argv[2],argv[3]);
@@ -122,10 +122,10 @@ int main(int argc, char* argv[])
bias[sid] = 0;
// cout << src[sid] << endl << trg[sid] << endl;
// show_pair(sid);
- vector<Token> snt;
+ vector<Token> snt;
fill_token_seq(*bg.V1,src[sid],snt);
vector<vector<sptr<vector<PhrasePair<Token> > > > > FG,BG;
- fg->lookup(snt,*fg->I1,FG,NULL,NULL,&bias,true);
+ fg->lookup(snt,*fg->I1,FG,NULL,NULL,&bias,true);
bg.lookup(snt,*bg.I1,BG,NULL,NULL,NULL,true);
set<sptr<vector<PhrasePair<Token> > > > seen;
for (size_t i = 0; i < snt.size(); ++i)
@@ -136,7 +136,7 @@ int main(int argc, char* argv[])
{
if (!m0.extend(snt[i+k].id())) break;
if (k && m0.approxOccurrenceCount() < 2) break;
- if (m1.size() == k && (!m1.extend(snt[i+k].id()) ||
+ if (m1.size() == k && (!m1.extend(snt[i+k].id()) ||
m1.approxOccurrenceCount() < 25))
{
cout << toString((*fg->V1), m0.getToken(0), m0.size()) << " "
@@ -156,8 +156,8 @@ int main(int argc, char* argv[])
sptr<pstats> bgstats;
jstats const* bgjstats = NULL;
Bitext<Token>::iter m2(bg.I2.get(), pp.start2, pp.len2);
- if (m1.approxOccurrenceCount() > 5000 ||
- m2.approxOccurrenceCount() > 5000)
+ if (m1.approxOccurrenceCount() > 5000 ||
+ m2.approxOccurrenceCount() > 5000)
continue;
if (m1.size() == pp.len1 && m2.size() == pp.len2)
{
@@ -173,9 +173,9 @@ int main(int argc, char* argv[])
cout << toString(*fg->V1, pp.start1, pp.len1) << " ::: "
<< toString(*fg->V2, pp.start2, pp.len2) << " "
<< format("[%u/%u/%u]") % pp.good1 % pp.joint % pp.good2;
- if (bgjstats)
- cout << " " << (format("[%u/%u/%u]")
- % bgstats->good % bgjstats->rcnt()
+ if (bgjstats)
+ cout << " " << (format("[%u/%u/%u]")
+ % bgstats->good % bgjstats->rcnt()
% (bgjstats->cnt2() * bgstats->good
/ bgstats->raw_cnt));
else if (m1.size() == pp.len1)
@@ -189,6 +189,6 @@ int main(int argc, char* argv[])
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/try-align.cc b/moses/TranslationModel/UG/try-align.cc
index daafec545..60eabb9e7 100644
--- a/moses/TranslationModel/UG/try-align.cc
+++ b/moses/TranslationModel/UG/try-align.cc
@@ -17,7 +17,7 @@ float lbop_level = .05;
namespace stats
{
using namespace Moses::bitext;
- float
+ float
pmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -29,8 +29,8 @@ namespace stats
return log(j) + log(N) - log(m1) - log(m2);
#endif
}
-
- float
+
+ float
npmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -39,11 +39,11 @@ namespace stats
float p12 = lbop(N,j,lbop_level);
return (log(p12) - log(p1) - log(p2)) / -log(p12);
#else
- return pmi(j,m1,m2,N) / (log(N) - log(j));
+ return pmi(j,m1,m2,N) / (log(N) - log(j));
#endif
}
- float
+ float
mi(size_t j,size_t m1, size_t m2, size_t N)
{
float ret = 0;
@@ -79,7 +79,7 @@ struct PhrasePair
float mi; // mutual information
float score;
- void
+ void
set(vector<ttrack::Position> const& o1,
vector<ttrack::Position> const& o2,
size_t const N)
@@ -90,7 +90,7 @@ struct PhrasePair
{
if (i1 && o1[i1].sid == o1[i1-1].sid) { ++i1; continue; }
if (i2 && o2[i2].sid == o2[i2-1].sid) { ++i2; continue; }
-
+
if (o1[i1].sid == o2[i2].sid) { ++j; ++i1; ++i2; ++m1; ++m2; }
else if (o1[i1].sid < o2[i2].sid) { ++i1; ++m1; }
else { ++i2; ++m2; }
@@ -114,19 +114,19 @@ struct PhrasePair
this->score = npmi; // npmi; // hmean; // /sqrt(z);
}
} stats;
-
+
PhrasePair(ushort s1_=0, ushort e1_=0, ushort s2_=0, ushort e2_=0)
: s1(s1_), e1(e1_), s2(s2_), e2(e2_), parent(-1) { }
- bool
+ bool
operator<(PhrasePair const& other) const
- {
- return (this->stats.score == other.stats.score
+ {
+ return (this->stats.score == other.stats.score
? (e1-s1 + e2-s2 > other.e1-other.s1 + other.e2-other.s2)
- : (this->stats.score > other.stats.score));
+ : (this->stats.score > other.stats.score));
}
-
+
size_t len1() const { return e1 - s1; }
size_t len2() const { return e2 - s2; }
bool includes(PhrasePair const& o) const
@@ -142,8 +142,8 @@ PhrasePair::stats_t::cache_t ppcache;
struct SortByPositionInCorpus
{
- bool
- operator()(ttrack::Position const& a,
+ bool
+ operator()(ttrack::Position const& a,
ttrack::Position const& b) const
{
return a.sid != b.sid ? a.sid < b.sid : a.offset < b.offset;
@@ -151,8 +151,8 @@ struct SortByPositionInCorpus
};
-void
-getoccs(tsa_t::tree_iterator const& m,
+void
+getoccs(tsa_t::tree_iterator const& m,
vector<ttrack::Position>& occs)
{
occs.clear();
@@ -166,9 +166,9 @@ getoccs(tsa_t::tree_iterator const& m,
sort(occs.begin(),occs.end(),SortByPositionInCorpus());
}
-void
-lookup_phrases(vector<id_type> const& snt,
- TokenIndex& V, ttrack_t const& T,
+void
+lookup_phrases(vector<id_type> const& snt,
+ TokenIndex& V, ttrack_t const& T,
tsa_t const& I, SinglePhrase::cache_t& cache,
vector<vector<sptr<SinglePhrase> > >& dest)
{
@@ -182,7 +182,7 @@ lookup_phrases(vector<id_type> const& snt,
if (m.approxOccurrenceCount() < 3) break;
// if (k - i > 0) break;
sptr<SinglePhrase>& o = cache[m.getPid()];
- if (!o)
+ if (!o)
{
o.reset(new SinglePhrase());
o->pid = m.getPid();
@@ -193,7 +193,7 @@ lookup_phrases(vector<id_type> const& snt,
}
}
-struct
+struct
RowIndexSorter
{
vector<vector<float> > const& M;
@@ -202,14 +202,14 @@ RowIndexSorter
: M(m), my_col(c) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(a).at(my_col) > M.at(b).at(my_col);
}
};
-struct
+struct
ColIndexSorter
{
vector<vector<float> > const& M;
@@ -218,9 +218,9 @@ ColIndexSorter
: M(m), my_row(r) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(my_row).at(a) > M[my_row].at(b);
}
@@ -234,7 +234,7 @@ int main(int argc, char* argv[])
T1.reset(new ttrack_t());
T2.reset(new ttrack_t());
-
+
V1.open(base + L1 + ".tdx");
T1->open(base + L1 + ".mct");
I1.open(base + L1 + ".sfa", T1);
@@ -259,7 +259,7 @@ int main(int argc, char* argv[])
vector<PhrasePair> pp_all,pp_good;
vector<int> a1(snt1.size(),-1);
vector<int> a2(snt2.size(),-1);
-
+
vector<vector<int> > z1(snt1.size(),vector<int>(snt1.size(),-1));
vector<vector<int> > z2(snt2.size(),vector<int>(snt2.size(),-1));
vector<vector<vector<PhrasePair> > >ppm1(M1.size()),ppm2(M2.size());
@@ -282,9 +282,9 @@ int main(int argc, char* argv[])
for (size_t k2 = 0; k2 < M2[i2].size(); ++k2)
{
pp.e2 = i2 + k2 + 1;
- sptr<PhrasePair::stats_t> & s
+ sptr<PhrasePair::stats_t> & s
= ppcache[make_pair(M1[i1][k1]->pid,M2[i2][k2]->pid)];
- if (!s)
+ if (!s)
{
s.reset(new PhrasePair::stats_t());
s->set(M1[i1][k1]->occs,M2[i2][k2]->occs,T1->size());
@@ -294,8 +294,8 @@ int main(int argc, char* argv[])
// ppm1[i1][k1].push_back(pp);
// ppm2[i2][k2].push_back(pp);
size_t J = pp.stats.j * 100;
- if (pp.stats.score > 0
- && J >= pp.stats.m1
+ if (pp.stats.score > 0
+ && J >= pp.stats.m1
&& J > pp.stats.m2)
{ pp_all.push_back(pp); }
}
@@ -310,7 +310,7 @@ int main(int argc, char* argv[])
for (size_t r = pp.s1; r < pp.e1; ++r)
for (size_t c = pp.s2; c < pp.e2; ++c)
{
- // M[r][c] += log(1-pp.stats.npmi);
+ // M[r][c] += log(1-pp.stats.npmi);
M[r][c] += log(1-pp.stats.mi);
}
}
@@ -342,11 +342,11 @@ int main(int argc, char* argv[])
}
cout << endl;
}
-#endif
+#endif
#if 0
for (size_t k = 1; k < pp_all.size(); ++k)
for (size_t i = k; i--;)
- if (pp_all[i].s1 >= pp_all[k].s1 &&
+ if (pp_all[i].s1 >= pp_all[k].s1 &&
pp_all[i].e1 <= pp_all[k].e1 &&
pp_all[i].s2 >= pp_all[k].s2 &&
pp_all[i].e2 <= pp_all[k].e2)
@@ -360,35 +360,35 @@ int main(int argc, char* argv[])
{
PhrasePair const& x = pp_all[p];
// if (x.stats.npmi < .7) break;
- // if (z1[x.s1][x.e1-1] >= 0 || z2[x.s2][x.e2-1] >=0)
+ // if (z1[x.s1][x.e1-1] >= 0 || z2[x.s2][x.e2-1] >=0)
// continue;
- for (size_t i = x.s1; i < x.e1; ++i)
+ for (size_t i = x.s1; i < x.e1; ++i)
{
- if (assoc1[i] < 0)
+ if (assoc1[i] < 0)
assoc1[i] = p;
else
{
// PhrasePair& y = pp_all[assoc1[i]];
- // if (y.includes(x))
+ // if (y.includes(x))
// assoc1[i] = p;
}
}
- for (size_t i = x.s2; i < x.e2; ++i)
+ for (size_t i = x.s2; i < x.e2; ++i)
{
- if (assoc2[i] < 0)
+ if (assoc2[i] < 0)
assoc2[i] = p;
else
{
// PhrasePair& y = pp_all[assoc2[i]];
- // if (y.includes(x))
+ // if (y.includes(x))
// assoc2[i] = p;
}
}
z1[x.s1][x.e1-1] = p;
z2[x.s2][x.e2-1] = p;
continue;
- cout << (boost::format("%.4f %.8f %.4f")
- % x.stats.score
+ cout << (boost::format("%.4f %.8f %.4f")
+ % x.stats.score
% x.stats.mi
% x.stats.npmi);
for (size_t z = x.s1; z < x.e1; ++z)
@@ -396,8 +396,8 @@ int main(int argc, char* argv[])
cout << " :::";
for (size_t z = x.s2; z < x.e2; ++z)
cout << " " << V2[snt2[z]];
- cout << " ["
- << x.stats.m1 << "/" << x.stats.j << "/" << x.stats.m2
+ cout << " ["
+ << x.stats.m1 << "/" << x.stats.j << "/" << x.stats.m2
<< "]" << endl;
}
vector<bool> done(pp_all.size(),false);
@@ -415,8 +415,8 @@ int main(int argc, char* argv[])
cout << " ::: ";
for (size_t j = p.s2; j < p.e2; ++j)
cout << j << ":" << V2[snt2[j]] << " ";
- cout << "["
- << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
+ cout << "["
+ << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
<< "] "<< p.stats.score << endl;
// break;
}
@@ -433,20 +433,20 @@ int main(int argc, char* argv[])
cout << " ::: ";
for (size_t j = p.s2; j < p.e2; ++j)
cout << j << ":" << V2[snt2[j]] << " ";
- cout << "["
- << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
+ cout << "["
+ << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
<< "] "<< p.stats.score << endl;
}
-#endif
+#endif
// sort(pp_all.begin(),pp_all.end());
// BOOST_FOREACH(PhrasePair const& pp, pp_all)
// {
- // while (ppm1[pp.s1].size() < pp.e1 - pp.s1)
+ // while (ppm1[pp.s1].size() < pp.e1 - pp.s1)
// ppm1[pp.s1].push_back(vector<PhrasePair>());
// vector<PhrasePair>& v1 = ppm1[pp.s1][pp.e1-pp.s1-1];
// if (v1.size() && v1[0].stats.score > pp.stats.score)
// continue;
- // while (ppm2[pp.s2].size() < pp.e2 - pp.s2)
+ // while (ppm2[pp.s2].size() < pp.e2 - pp.s2)
// ppm2[pp.s2].push_back(vector<PhrasePair>());
// vector<PhrasePair>& v2 = ppm2[pp.s2][pp.e2-pp.s2-1];
// if (v2.size() && v2[0].stats.score > pp.stats.score)
@@ -455,12 +455,12 @@ int main(int argc, char* argv[])
// v2.push_back(pp);
// }
-
+
// BOOST_FOREACH(vector<vector<PhrasePair> >& vv, ppm1)
- // {
- // BOOST_FOREACH(vector<PhrasePair>& v, vv)
- // {
- // sort(v.begin(),v.end());
+ // {
+ // BOOST_FOREACH(vector<PhrasePair>& v, vv)
+ // {
+ // sort(v.begin(),v.end());
// if (v.size() > 1 && v[0].stats.score == v[1].stats.score)
// v.clear();
// }
@@ -468,19 +468,19 @@ int main(int argc, char* argv[])
// for (size_t i2 = 0; i2 < ppm2.size(); ++i2)
// {
// for (size_t k2 = 0; k2 < ppm2[i2].size(); ++k2)
- // {
+ // {
// vector<PhrasePair>& v2 = ppm2[i2][k2];
// sort(v2.begin(),v2.end());
- // if (v2.size() > 1 && v2[0].stats.score == v2[1].stats.score)
+ // if (v2.size() > 1 && v2[0].stats.score == v2[1].stats.score)
// {
// v2.clear();
// continue;
// }
// ushort i1 = v2[0].s1;
// ushort k1 = v2[0].e1 - i1 -1;
-
- // if (ppm1[i1][k1].size() == 0 ||
- // ppm1[i1][k1][0].s2 != i2 ||
+
+ // if (ppm1[i1][k1].size() == 0 ||
+ // ppm1[i1][k1][0].s2 != i2 ||
// ppm1[i1][k1][0].e2 != i2 + k2 + 1)
// { v2.clear(); }
// else pp_good.push_back(ppm2[i2][k2][0]);
@@ -508,7 +508,7 @@ int main(int argc, char* argv[])
// // cout << V2[snt2[z]] << " ";
// // cout << pp.m1 << "/" << pp.j << "/" << pp.m2 << endl;
// // }
-
+
}
}
diff --git a/moses/TranslationModel/UG/try-align2.cc b/moses/TranslationModel/UG/try-align2.cc
index 57cf25035..a18ce8d92 100644
--- a/moses/TranslationModel/UG/try-align2.cc
+++ b/moses/TranslationModel/UG/try-align2.cc
@@ -29,7 +29,7 @@ float lbop_level = .05;
namespace stats
{
using namespace Moses::bitext;
- float
+ float
pmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -41,8 +41,8 @@ namespace stats
return log(j) + log(N) - log(m1) - log(m2);
#endif
}
-
- float
+
+ float
npmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -52,11 +52,11 @@ namespace stats
float p12 = lbop(N,j,lbop_level);
return (log(p12) - log(p1) - log(p2)) / -log(p12);
#else
- return pmi(j,m1,m2,N) / (log(N) - log(j));
+ return pmi(j,m1,m2,N) / (log(N) - log(j));
#endif
}
- float
+ float
mi(size_t j,size_t m1, size_t m2, size_t N)
{
float ret = 0;
@@ -92,7 +92,7 @@ struct PhrasePair2
float mi; // mutual information
float score;
- void
+ void
set(vector<ttrack::Position> const& o1,
vector<ttrack::Position> const& o2,
size_t const N)
@@ -103,7 +103,7 @@ struct PhrasePair2
{
if (i1 && o1[i1].sid == o1[i1-1].sid) { ++i1; continue; }
if (i2 && o2[i2].sid == o2[i2-1].sid) { ++i2; continue; }
-
+
if (o1[i1].sid == o2[i2].sid) { ++j; ++i1; ++i2; ++m1; ++m2; }
else if (o1[i1].sid < o2[i2].sid) { ++i1; ++m1; }
else { ++i2; ++m2; }
@@ -127,19 +127,19 @@ struct PhrasePair2
this->score = npmi; // npmi; // hmean; // /sqrt(z);
}
} stats;
-
+
PhrasePair2(ushort s1_=0, ushort e1_=0, ushort s2_=0, ushort e2_=0)
: s1(s1_), e1(e1_), s2(s2_), e2(e2_), parent(-1) { }
- bool
+ bool
operator<(PhrasePair2 const& other) const
- {
- return (this->stats.score == other.stats.score
+ {
+ return (this->stats.score == other.stats.score
? (e1-s1 + e2-s2 > other.e1-other.s1 + other.e2-other.s2)
- : (this->stats.score > other.stats.score));
+ : (this->stats.score > other.stats.score));
}
-
+
size_t len1() const { return e1 - s1; }
size_t len2() const { return e2 - s2; }
bool includes(PhrasePair2 const& o) const
@@ -155,8 +155,8 @@ PhrasePair2::stats_t::cache_t ppcache;
struct SortByPositionInCorpus
{
- bool
- operator()(ttrack::Position const& a,
+ bool
+ operator()(ttrack::Position const& a,
ttrack::Position const& b) const
{
return a.sid != b.sid ? a.sid < b.sid : a.offset < b.offset;
@@ -164,8 +164,8 @@ struct SortByPositionInCorpus
};
-void
-getoccs(tsa_t::tree_iterator const& m,
+void
+getoccs(tsa_t::tree_iterator const& m,
vector<ttrack::Position>& occs)
{
occs.clear();
@@ -179,9 +179,9 @@ getoccs(tsa_t::tree_iterator const& m,
sort(occs.begin(),occs.end(),SortByPositionInCorpus());
}
-void
-lookup_phrases(vector<id_type> const& snt,
- TokenIndex& V, ttrack_t const& T,
+void
+lookup_phrases(vector<id_type> const& snt,
+ TokenIndex& V, ttrack_t const& T,
tsa_t const& I, SinglePhrase::cache_t& cache,
vector<vector<sptr<SinglePhrase> > >& dest)
{
@@ -195,7 +195,7 @@ lookup_phrases(vector<id_type> const& snt,
if (m.approxOccurrenceCount() < 3) break;
// if (k - i > 0) break;
sptr<SinglePhrase>& o = cache[m.getPid()];
- if (!o)
+ if (!o)
{
o.reset(new SinglePhrase());
o->pid = m.getPid();
@@ -207,7 +207,7 @@ lookup_phrases(vector<id_type> const& snt,
}
-struct
+struct
RowIndexSorter
{
vector<vector<float> > const& M;
@@ -216,14 +216,14 @@ RowIndexSorter
: M(m), my_col(c) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(a).at(my_col) > M.at(b).at(my_col);
}
};
-struct
+struct
ColIndexSorter
{
vector<vector<float> > const& M;
@@ -232,9 +232,9 @@ ColIndexSorter
: M(m), my_row(r) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(my_row).at(a) > M[my_row].at(b);
}
@@ -249,7 +249,7 @@ public:
{
#if 0
cout << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " "
- << pp.raw2 << " " << pp.sample2 << " " << pp.good2 << " "
+ << pp.raw2 << " " << pp.sample2 << " " << pp.good2 << " "
<< pp.joint << " " << __FILE__ << ":" << __LINE__ << endl;
#endif
pp.good2 = ceil(pp.raw2 * float(pp.good1)/pp.raw1);
@@ -266,7 +266,7 @@ class Alnhyp
};
-size_t
+size_t
lcs(string const a, string const b)
{
using namespace stringdist;
@@ -279,10 +279,10 @@ lcs(string const a, string const b)
{
StringDiff::Segment const& s = diff[i];
if (s.match != StringDiff::same && s.match != StringDiff::cap)
- {
+ {
if (len > ret) ret = len;
- len = 0;
- continue;
+ len = 0;
+ continue;
}
len += s.end_a - s.start_a;
}
@@ -290,9 +290,9 @@ lcs(string const a, string const b)
return ret;
}
-size_t
-mapstring(string const& utf8,
- UnicodeString& U,
+size_t
+mapstring(string const& utf8,
+ UnicodeString& U,
vector<int>& c2w,
vector<int>* wlen=NULL)
{
@@ -338,10 +338,10 @@ align_letters(UnicodeString const& A, vector<int> const& a2p,
// }
}
-void
+void
map_back(vector<vector<int> > const& W,
vector<vector<int> > & X,
- vector<uchar> const & aln)
+ vector<uchar> const & aln)
{
for (size_t i = 0; i < aln.size(); i += 2)
{
@@ -354,7 +354,7 @@ map_back(vector<vector<int> > const& W,
}
-void trymatch3(vector<PhrasePair<Token> > const& tcands,
+void trymatch3(vector<PhrasePair<Token> > const& tcands,
UnicodeString const& T, size_t const tlen,
vector<int> const& t2p,
TokenIndex const& V2, vector<vector<int> >&X)
@@ -374,8 +374,8 @@ void trymatch3(vector<PhrasePair<Token> > const& tcands,
cout << slen << " " << tlen << endl;
cout << "W: " << W.size() << " rows; " << W[0].size() << " cols" << endl;
cout << "X: " << X.size() << " rows; " << X[0].size() << " cols" << endl;
- cout << "aln: ";
- for (size_t a = 0; a < pp.aln.size(); a +=2)
+ cout << "aln: ";
+ for (size_t a = 0; a < pp.aln.size(); a +=2)
cout << int(pp.aln[a]) << "-" << int(pp.aln[a+1]) << " ";
cout << endl;
#endif
@@ -383,7 +383,7 @@ void trymatch3(vector<PhrasePair<Token> > const& tcands,
}
}
-void minmatch_filter(vector<vector<int> > & X,
+void minmatch_filter(vector<vector<int> > & X,
vector<int> const& len1,
vector<int> const& len2)
{
@@ -437,20 +437,20 @@ trymatch2(TokenIndex& V1, // source language vocab
TokenIndex& V2, // target language vocab
string const& source, // source phrase
string const& target, // observed target candidate
- vector<PhrasePair<Token> > const* const tcands,
+ vector<PhrasePair<Token> > const* const tcands,
vector<vector<int> >& X) // destination alignment matrix
// tcands: translations for source
{
- UnicodeString S,T;
+ UnicodeString S,T;
vector<int> t2p, s2p; // maps from character position in string to word pos.
vector<int> wlen_t, wlen_s; // individual word lengths
size_t slen = mapstring(source, S, s2p, &wlen_s);
size_t tlen = mapstring(target, T, t2p, &wlen_t);
-
+
X.assign(slen,vector<int>(tlen,0));
- if (slen == 1 && tlen ==1 && S == T)
+ if (slen == 1 && tlen ==1 && S == T)
X[0][0] = S.length();
- else
+ else
{
align_letters(S,s2p,T,t2p,X);
if (tcands) trymatch3(*tcands, T, tlen, t2p, V2, X);
@@ -475,7 +475,7 @@ trymatch2(TokenIndex& V1, // source language vocab
// float
-// trymatch(string const a, string const b,
+// trymatch(string const a, string const b,
// vector<PhrasePair<Token> > const* atrans,
// vector<PhrasePair<Token> > const* btrans)
// {
@@ -501,11 +501,11 @@ trymatch2(TokenIndex& V1, // source language vocab
// // float bar = float(lcs(foo,b))/min(foo.size(),b.size());
// float bar = float(lcs(foo,b));
-// if (bar > .5)
+// if (bar > .5)
// {
// // score = max(pp.score * bar,score);
// score = max(bar,score);
-// // cout << "[" << bar << "] " << foo << " ::: " << b
+// // cout << "[" << bar << "] " << foo << " ::: " << b
// // << " (" << a << ") " << pp.score << endl;
// }
// }
@@ -525,10 +525,10 @@ trymatch2(TokenIndex& V1, // source language vocab
// string foo = toString(*BT.V1,pp.start2,pp.len2);
// // float bar = float(lcs(a,foo))/min(a.size(),foo.size());
// float bar = float(lcs(a,foo));
-// if (bar > .5)
+// if (bar > .5)
// {
// score = max(bar,score);
-// // cout << "[" << bar<< "] " << a << " ::: " << foo
+// // cout << "[" << bar<< "] " << a << " ::: " << foo
// // << " (" << b << ") " << pp.score << endl;
// }
// }
@@ -547,8 +547,8 @@ struct ahyp
struct AlnPoint
{
enum status { no = 0, yes = 1, maybe = -1, undef = -7 };
- float score;
- status state;
+ float score;
+ status state;
AlnPoint() : score(0), state(undef) {}
};
@@ -562,14 +562,14 @@ class AlnMatrix
vector<bitvector> A1,A2; // final alignment matrix
vector<bitvector> S1,S2; // shadow alignment matrix
public:
- vector<bitvector*> m1,m2; // margins
+ vector<bitvector*> m1,m2; // margins
AlnMatrix(size_t const rows, size_t const cols);
- bitvector const&
+ bitvector const&
operator[](size_t const r) const
{ return A1.at(r); }
bool
- incorporate(span_t const& rspan, span_t const& cspan,
+ incorporate(span_t const& rspan, span_t const& cspan,
vector<uchar> const& aln, bool const flip);
size_t size() const { return A1.size(); }
@@ -588,9 +588,9 @@ AlnMatrix(size_t const rows, size_t const cols)
bool
AlnMatrix::
-incorporate(span_t const& rspan,
- span_t const& cspan,
- vector<uchar> const& aln,
+incorporate(span_t const& rspan,
+ span_t const& cspan,
+ vector<uchar> const& aln,
bool const flip)
{
for (size_t r = rspan.first; r < rspan.second; ++r)
@@ -622,7 +622,7 @@ incorporate(span_t const& rspan,
if (m1[r] && (*m1[r]) != S1[r]) return false;
for (size_t c = cspan.first; c < cspan.second; ++c)
if (m2[c] && (*m2[c]) != S2[c]) return false;
-
+
// all good, add new points
for (size_t r = rspan.first; r < rspan.second; ++r)
if (!m1[r]) { A1[r] = S1[r]; m1[r] = &A1[r]; }
@@ -632,9 +632,9 @@ incorporate(span_t const& rspan,
return true;
}
-struct alink
-{
- size_t r,c,m;
+struct alink
+{
+ size_t r,c,m;
bool operator<(alink const& o) const { return m < o.m; }
bool operator>(alink const& o) const { return m > o.m; }
};
@@ -659,9 +659,9 @@ int main(int argc, char* argv[])
vector<vector<uint64_t> > pm1,pm2;
BT.lookup(snt1,*BT.I1,pt1,&pm1,&scorer);
BT.lookup(snt2,*BT.I2,pt2,&pm2,&scorer);
-
+
// build map from phrases to positions
- typedef boost::unordered_map<uint64_t, vector<span_t> >
+ typedef boost::unordered_map<uint64_t, vector<span_t> >
p2s_map_t;
typedef p2s_map_t::iterator p2s_iter;
p2s_map_t p2s1,p2s2;
@@ -684,7 +684,7 @@ int main(int argc, char* argv[])
BOOST_FOREACH(PhrasePair<Token> const& pp, *pt1[i][k])
{
if (pp.score < 0) break;
- if (p2s2.find(pp.p2) != p2s2.end())
+ if (p2s2.find(pp.p2) != p2s2.end())
pp_all.push_back(pp);
}
}
@@ -704,10 +704,10 @@ int main(int argc, char* argv[])
{
PhrasePair<Token> const& pp = pp_all[p];
#if 0
- cout << (boost::format("%30s ::: %-30s ")
+ cout << (boost::format("%30s ::: %-30s ")
% BT.toString(pp.p1,0).c_str()
% BT.toString(pp.p2,1).c_str());
- cout << (boost::format("%.4f [%d/%d/%d]")
+ cout << (boost::format("%.4f [%d/%d/%d]")
% pp.score % pp.good1 % pp.joint % pp.good2);
for (size_t a = 0; a < pp.aln.size(); a += 2)
cout << " " << int(pp.aln[a]) << "-" << int(pp.aln[a+1]);
@@ -720,7 +720,7 @@ int main(int argc, char* argv[])
for (size_t i = v1[0].first; i < v1[0].second; ++i)
if (a1[i] < 0) a1[i] = p;
if (v2.size() == 1)
- for (size_t i = v2[0].first; i < v2[0].second; ++i)
+ for (size_t i = v2[0].first; i < v2[0].second; ++i)
if (a2[i] < 0) a2[i] = p;
if (v1.size() == 1 && v2.size() == 1)
@@ -740,11 +740,11 @@ int main(int argc, char* argv[])
vector<PhrasePair<Token> > const* atrans, *btrans;
ahyp h;
vector<ahyp> hyps;
- vector<vector<int> > L(snt1.size(),vector<int>(snt2.size(),0));
+ vector<vector<int> > L(snt1.size(),vector<int>(snt2.size(),0));
// L: matches by letter overlap
for (h.s1 = 0; h.s1 < a1.size(); ++h.s1)
- {
+ {
if (a1[h.s1] >= 0) continue;
ostringstream buf1;
for (h.e1 = h.s1; h.e1 < a1.size() && a1[h.e1] < 0; ++h.e1)
@@ -762,23 +762,23 @@ int main(int argc, char* argv[])
if (a2[h.s2] >= 0) continue;
for (h.e2 = h.s2; h.e2 < a2.size() && a2[h.e2] < 0; ++h.e2)
{
- if (h.e2 > h.s2)
+ if (h.e2 > h.s2)
{
if (pt2[h.s2].size() + h.s2 <= h.e2) break;
buf2 << " ";
}
buf2 << (*BT.V2)[snt2[h.e2].id()];
- btrans = (pt2[h.s2].size()
- ? pt2[h.s2].at(h.e2-h.s2).get()
+ btrans = (pt2[h.s2].size()
+ ? pt2[h.s2].at(h.e2-h.s2).get()
: NULL);
vector<vector<int> > aln;
- trymatch2(*BT.V1, *BT.V2, buf1.str(),buf2.str(),
+ trymatch2(*BT.V1, *BT.V2, buf1.str(),buf2.str(),
atrans,aln);
for (size_t i = 0; i < aln.size(); ++i)
for (size_t k = 0; k < aln[i].size(); ++k)
L[h.s1+i][h.s2+k] = max(L[h.s1+i][h.s2+k],aln[i][k]);
- trymatch2(*BT.V2, *BT.V1, buf2.str(),buf1.str(),
+ trymatch2(*BT.V2, *BT.V1, buf2.str(),buf1.str(),
btrans,aln);
for (size_t i = 0; i < aln[0].size(); ++i)
for (size_t k = 0; k < aln.size(); ++k)
@@ -795,7 +795,7 @@ int main(int argc, char* argv[])
alink x;
for (x.r = 0; x.r < L.size(); ++x.r)
{
-
+
for (x.c = 0; x.c < L[x.r].size(); ++x.c)
{
x.m = L[x.r][x.c];
@@ -807,22 +807,22 @@ int main(int argc, char* argv[])
BOOST_FOREACH(alink& x, links)
{
- if (L[x.r][x.c])
+ if (L[x.r][x.c])
{
cout << (*BT.V1)[snt1[x.r].id()] << " ::: "
<< (*BT.V2)[snt2[x.c].id()] << " ::: "
<< L[x.r][x.c] << endl;
}
- }
+ }
// sort(hyps.begin(),hyps.end(),greater<ahyp>());
// BOOST_FOREACH(ahyp const& h, hyps)
// {
// if (h.score < .5) break;
- // for (size_t i = h.s1; i <= h.e1; ++i)
+ // for (size_t i = h.s1; i <= h.e1; ++i)
// cout << i << ":" << (*BT.V1)[snt1[i].id()] << " ";
// cout << " ::: ";
- // for (size_t i = h.s2; i <= h.e2; ++i)
+ // for (size_t i = h.s2; i <= h.e2; ++i)
// cout << i << ":" << (*BT.V2)[snt2[i].id()] << " ";
// cout << h.score << endl;
// }
@@ -854,15 +854,15 @@ int main(int argc, char* argv[])
// #if 0
// if (match)
// {
-// if (first)
+// if (first)
// {
// cout << BT.toString(pm1[i][k],0) << endl;
// first = false;
// }
-// cout << boost::format("%.4f") % pt.score << " "
+// cout << boost::format("%.4f") % pt.score << " "
// << setw(5) << d1 << " " << (match ? "* " : " ")
// << toString(*BT.V2, pt.start2, pt.len2) << " ["
-// << pt.good1 << "/" << pt.joint << "/"
+// << pt.good1 << "/" << pt.joint << "/"
// << pt.good2 << "]";
// for (size_t a = 0; a < pt.aln.size(); a += 2)
// cout << " " << int(pt.aln[a]) << "-" << int(pt.aln[a+1]);
@@ -879,7 +879,7 @@ int main(int argc, char* argv[])
// pp_all.push_back(pt);
// // pp_all.back().m1 -= d1;
// }
-
+
// }
// if (!first) cout << endl;
// }
diff --git a/moses/TranslationModel/UG/util/ibm1-align.cc b/moses/TranslationModel/UG/util/ibm1-align.cc
index 08ac1f89b..3c43743d0 100644
--- a/moses/TranslationModel/UG/util/ibm1-align.cc
+++ b/moses/TranslationModel/UG/util/ibm1-align.cc
@@ -1,7 +1,7 @@
// -*- c++ -*-
// Parallel text alignment via IBM1 / raw counts of word alignments
// aiming at high precision (to seed Yawat alignments)
-// This program is tailored for use with Yawat.
+// This program is tailored for use with Yawat.
// Written by Ulrich Germann.
#include <string>
@@ -29,20 +29,20 @@ public:
table_t COOC;
TokenIndex V1,V2;
- void
+ void
align(string const& s1, string const& s2, vector<int>& aln) const;
- void
- align(vector<id_type> const& x1,
- vector<id_type> const& x2,
+ void
+ align(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<int>& aln) const;
-
- void
- fill_amatrix(vector<id_type> const& x1,
- vector<id_type> const& x2,
+
+ void
+ fill_amatrix(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<vector<int> >& aln) const;
- void
+ void
open(string const base, string const L1, string const L2);
};
@@ -75,10 +75,10 @@ u(StringPiece str, size_t start, size_t stop)
return ret;
}
-void
+void
IBM1::
-fill_amatrix(vector<id_type> const& x1,
- vector<id_type> const& x2,
+fill_amatrix(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<vector<int> >& aln) const
{
aln.assign(x1.size(),vector<int>(x2.size()));
@@ -108,8 +108,8 @@ fill_amatrix(vector<id_type> const& x1,
void
IBM1::
-align(vector<id_type> const& x1,
- vector<id_type> const& x2,
+align(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<int>& aln) const
{
vector<vector<int> > M;
@@ -157,7 +157,7 @@ int main(int argc, char* argv[])
// cout << line1 << endl;
// cout << line2 << endl;
// for (size_t i = 0; i < a.size(); i += 2)
- // cout << ibm1.V1[s1[a[i]]] << " - "
+ // cout << ibm1.V1[s1[a[i]]] << " - "
// << ibm1.V2[s2[a[i+1]]] << endl;
}
// cout << endl;
diff --git a/moses/TranslationModel/UG/util/tokenindex.dump.cc b/moses/TranslationModel/UG/util/tokenindex.dump.cc
index 8ab68579d..0e885630f 100644
--- a/moses/TranslationModel/UG/util/tokenindex.dump.cc
+++ b/moses/TranslationModel/UG/util/tokenindex.dump.cc
@@ -13,7 +13,7 @@
using namespace std;
using namespace ugdiss;
-int
+int
main(int argc,char* argv[])
{
if (argc > 1 && !strcmp(argv[1], "-h")) {
diff --git a/moses/TranslationModel/fuzzy-match/Vocabulary.cpp b/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
index ab1439a29..b70eb98ca 100644
--- a/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
+++ b/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
@@ -1,71 +1,71 @@
-// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
-#include "Vocabulary.h"
-#ifdef WITH_THREADS
-#include <boost/thread/locks.hpp>
-#endif
-
-using namespace std;
-
-namespace tmmt
-{
-
-// as in beamdecoder/tables.cpp
-vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
-{
- vector< WORD_ID > token;
- bool betweenWords = true;
- int start=0;
- int i=0;
- for(; input[i] != '\0'; i++) {
- bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
- if (!isSpace && betweenWords) {
- start = i;
- betweenWords = false;
- } else if (isSpace && !betweenWords) {
- token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
- betweenWords = true;
- }
- }
- if (!betweenWords)
- token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
- return token;
-}
-
-WORD_ID Vocabulary::StoreIfNew( const WORD& word )
-{
-
- {
- // read=lock scope
-#ifdef WITH_THREADS
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
-#endif
- map<WORD, WORD_ID>::iterator i = lookup.find( word );
-
- if( i != lookup.end() )
- return i->second;
- }
-
-#ifdef WITH_THREADS
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
-#endif
- WORD_ID id = vocab.size();
- vocab.push_back( word );
- lookup[ word ] = id;
- return id;
-}
-
-WORD_ID Vocabulary::GetWordID( const WORD &word )
-{
-#ifdef WITH_THREADS
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
-#endif
- map<WORD, WORD_ID>::iterator i = lookup.find( word );
- if( i == lookup.end() )
- return 0;
- WORD_ID w= (WORD_ID) i->second;
- return w;
-}
-
-}
-
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+#ifdef WITH_THREADS
+#include <boost/thread/locks.hpp>
+#endif
+
+using namespace std;
+
+namespace tmmt
+{
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
+{
+ vector< WORD_ID > token;
+ bool betweenWords = true;
+ int start=0;
+ int i=0;
+ for(; input[i] != '\0'; i++) {
+ bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+ if (!isSpace && betweenWords) {
+ start = i;
+ betweenWords = false;
+ } else if (isSpace && !betweenWords) {
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ betweenWords = true;
+ }
+ }
+ if (!betweenWords)
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word )
+{
+
+ {
+ // read=lock scope
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+
+ if( i != lookup.end() )
+ return i->second;
+ }
+
+#ifdef WITH_THREADS
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+ WORD_ID id = vocab.size();
+ vocab.push_back( word );
+ lookup[ word ] = id;
+ return id;
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word )
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+ if( i == lookup.end() )
+ return 0;
+ WORD_ID w= (WORD_ID) i->second;
+ return w;
+}
+
+}
+