Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--biconcor/Vocabulary.cpp202
-rw-r--r--contrib/c++tokenizer/tokenizer.cpp254
-rw-r--r--contrib/c++tokenizer/tokenizer.h16
-rw-r--r--contrib/c++tokenizer/tokenizer_main.cpp18
-rw-r--r--contrib/eppex/ISS.h472
-rw-r--r--contrib/eppex/LossyCounter.h8
-rw-r--r--contrib/eppex/eppex.cpp6
-rw-r--r--contrib/eppex/phrase-extract.cpp20
-rw-r--r--contrib/other-builds/cmake/boost.example/main.cpp10
-rw-r--r--contrib/python/moses/dictree.cpp544
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/WIN32_functions.cpp462
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/WIN32_functions.h48
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/filter-pt.cpp2
-rwxr-xr-xcontrib/relent-filter/src/IOWrapper.cpp10
-rwxr-xr-xcontrib/relent-filter/src/Main.cpp30
-rwxr-xr-xcontrib/relent-filter/src/RelativeEntropyCalc.cpp2
-rwxr-xr-xcontrib/relent-filter/src/TranslationAnalysis.cpp2
-rw-r--r--contrib/rephraser/paraphrase.cpp4
-rw-r--r--contrib/server/mosesserver.cpp86
-rw-r--r--contrib/sigtest-filter/WIN32_functions.cpp462
-rw-r--r--contrib/sigtest-filter/WIN32_functions.h48
-rw-r--r--contrib/sigtest-filter/filter-pt.cpp84
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h12
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-array.h2
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-beam.h4
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-cpt.h4
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-crf.h24
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-denot.h6
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h18
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-dtree.h44
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h20
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-gauss.h20
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-hash.h2
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-hmm.h32
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-hmm2.h28
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h2
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-linsep.h4
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-mixture.h16
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-modelfile.h10
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-oblidtree.h6
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-prob.h6
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-probmodel.h20
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-racpt.h32
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-randvar.h18
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-safeids.h12
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-stream.h14
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-string.h10
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-stringindex.h2
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-tetrahex.h2
-rw-r--r--contrib/synlm/hhmm/rvtl/include/nl-timer.h2
-rw-r--r--contrib/synlm/hhmm/wsjparse/include/HHMMLangModel-gf.h2
-rw-r--r--contrib/synlm/hhmm/wsjparse/include/TextObsModel.h12
-rw-r--r--lm/bhiksha.cc6
-rw-r--r--lm/bhiksha.hh6
-rw-r--r--lm/binary_format.cc2
-rw-r--r--lm/binary_format.hh12
-rw-r--r--lm/blank.hh6
-rw-r--r--lm/build_binary_main.cc2
-rw-r--r--lm/builder/adjust_counts.cc56
-rw-r--r--lm/builder/adjust_counts.hh8
-rw-r--r--lm/builder/adjust_counts_test.cc2
-rw-r--r--lm/builder/corpus_count.cc28
-rw-r--r--lm/builder/corpus_count.hh2
-rw-r--r--lm/builder/initial_probabilities.cc42
-rw-r--r--lm/builder/initial_probabilities.hh6
-rw-r--r--lm/builder/interpolate.cc6
-rw-r--r--lm/builder/interpolate.hh4
-rw-r--r--lm/builder/joint_order.hh10
-rw-r--r--lm/builder/lmplz_main.cc10
-rw-r--r--lm/builder/ngram.hh10
-rw-r--r--lm/builder/pipeline.cc18
-rw-r--r--lm/builder/pipeline.hh2
-rw-r--r--lm/builder/print.cc2
-rw-r--r--lm/builder/print.hh6
-rw-r--r--lm/builder/sort.hh40
-rw-r--r--lm/enumerate_vocab.hh2
-rw-r--r--lm/facade.hh8
-rw-r--r--lm/filter/count_io.hh4
-rw-r--r--lm/filter/filter_main.cc2
-rw-r--r--lm/filter/format.hh12
-rw-r--r--lm/filter/phrase.cc26
-rw-r--r--lm/filter/phrase.hh14
-rw-r--r--lm/filter/phrase_table_vocab_main.cc4
-rw-r--r--lm/filter/thread.hh14
-rw-r--r--lm/filter/vocab.cc2
-rw-r--r--lm/filter/vocab.hh2
-rw-r--r--lm/filter/wrapper.hh2
-rw-r--r--lm/left.hh44
-rw-r--r--lm/left_test.cc4
-rw-r--r--lm/lm_exception.hh2
-rw-r--r--lm/max_order.hh2
-rw-r--r--lm/model.hh40
-rw-r--r--lm/model_test.cc10
-rw-r--r--lm/model_type.hh2
-rw-r--r--lm/ngram_query.hh4
-rw-r--r--lm/partial.hh10
-rw-r--r--lm/partial_test.cc4
-rw-r--r--lm/quantize.cc4
-rw-r--r--lm/quantize.hh10
-rw-r--r--lm/return.hh10
-rw-r--r--lm/search_trie.cc2
-rw-r--r--lm/sizes.cc2
-rw-r--r--lm/state.hh12
-rw-r--r--lm/trie.cc8
-rw-r--r--lm/trie.hh18
-rw-r--r--lm/trie_sort.cc24
-rw-r--r--lm/trie_sort.hh4
-rw-r--r--lm/value.hh4
-rw-r--r--lm/value_build.cc8
-rw-r--r--lm/value_build.hh2
-rw-r--r--lm/virtual_interface.hh32
-rw-r--r--lm/vocab.cc24
-rw-r--r--lm/vocab.hh14
-rw-r--r--lm/weights.hh4
-rw-r--r--lm/wrappers/nplm.cc4
-rw-r--r--lm/wrappers/nplm.hh2
-rw-r--r--mert/ForestRescoreTest.cpp2
-rw-r--r--misc/merge-sorted.cc22
-rw-r--r--misc/pmoses/pmoses.cc34
-rw-r--r--moses/BaseManager.cpp6
-rw-r--r--moses/BaseManager.h6
-rw-r--r--moses/ChartCellCollection.h2
-rw-r--r--moses/ChartManager.cpp2
-rw-r--r--moses/ChartParser.cpp4
-rw-r--r--moses/ConfusionNet.h2
-rw-r--r--moses/ContextScope.h16
-rw-r--r--moses/DecodeStepTranslation.cpp34
-rw-r--r--moses/ExportInterface.cpp66
-rw-r--r--moses/FF/Factory.cpp14
-rw-r--r--moses/FF/FeatureFunction.cpp18
-rw-r--r--moses/FF/FeatureFunction.h12
-rw-r--r--moses/FF/LexicalReordering/LexicalReordering.cpp8
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.cpp2
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingState.h2
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.cpp8
-rw-r--r--moses/FF/LexicalReordering/SparseReordering.h2
-rw-r--r--moses/FF/Model1Feature.cpp32
-rw-r--r--moses/FF/Model1Feature.h4
-rw-r--r--moses/FF/PhraseOrientationFeature.cpp4
-rw-r--r--moses/FF/RulePairUnlexicalizedSource.cpp8
-rw-r--r--moses/FF/RuleScope.cpp4
-rw-r--r--moses/FF/SoftSourceSyntacticConstraintsFeature.cpp8
-rw-r--r--moses/FF/VW/VW.h10
-rw-r--r--moses/Hypothesis.cpp122
-rw-r--r--moses/Hypothesis.h8
-rw-r--r--moses/IOWrapper.cpp4
-rw-r--r--moses/Incremental.cpp4
-rw-r--r--moses/InputType.cpp2
-rw-r--r--moses/InputType.h2
-rw-r--r--moses/LM/RDLM.cpp38
-rw-r--r--moses/Manager.cpp8
-rw-r--r--moses/MockHypothesis.cpp4
-rw-r--r--moses/Parameter.cpp114
-rw-r--r--moses/Parameter.h22
-rw-r--r--moses/ScoreComponentCollection.cpp10
-rw-r--r--moses/ScoreComponentCollection.h6
-rw-r--r--moses/Sentence.cpp72
-rw-r--r--moses/Sentence.h22
-rw-r--r--moses/StaticData.cpp72
-rw-r--r--moses/StaticData.h10
-rw-r--r--moses/Syntax/F2S/Manager.h2
-rw-r--r--moses/TargetPhrase.cpp12
-rw-r--r--moses/TargetPhrase.h12
-rw-r--r--moses/ThreadPool.h2
-rw-r--r--moses/TrainingTask.h20
-rw-r--r--moses/TranslationModel/CompactPT/BlockHashIndex.h4
-rw-r--r--moses/TranslationModel/CompactPT/MurmurHash3.cpp850
-rw-r--r--moses/TranslationModel/CompactPT/MurmurHash3.h74
-rw-r--r--moses/TranslationModel/PhraseDictionary.h10
-rw-r--r--moses/TranslationModel/ProbingPT/hash.hh2
-rw-r--r--moses/TranslationModel/ProbingPT/storing.hh2
-rw-r--r--moses/TranslationModel/UG/TargetPhraseCollectionCache.cc52
-rw-r--r--moses/TranslationModel/UG/TargetPhraseCollectionCache.h18
-rw-r--r--moses/TranslationModel/UG/bitext-find.cc36
-rw-r--r--moses/TranslationModel/UG/count-ptable-features.cc4
-rw-r--r--moses/TranslationModel/UG/generic/file_io/ug_stream.cpp8
-rw-r--r--moses/TranslationModel/UG/generic/file_io/ug_stream.h2
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp8
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_get_options.h12
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc12
-rw-r--r--moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h8
-rw-r--r--moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h22
-rw-r--r--moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc78
-rw-r--r--moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h16
-rw-r--r--moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc16
-rw-r--r--moses/TranslationModel/UG/mm/calc-coverage.cc2
-rw-r--r--moses/TranslationModel/UG/mm/custom-pt.cc34
-rw-r--r--moses/TranslationModel/UG/mm/mam2symal.cc14
-rw-r--r--moses/TranslationModel/UG/mm/mam_verify.cc12
-rw-r--r--moses/TranslationModel/UG/mm/mmlex-build.cc50
-rw-r--r--moses/TranslationModel/UG/mm/mmlex-lookup.cc30
-rw-r--r--moses/TranslationModel/UG/mm/mtt-build.cc102
-rw-r--r--moses/TranslationModel/UG/mm/mtt-count-words.cc6
-rw-r--r--moses/TranslationModel/UG/mm/mtt-demo1.cc10
-rw-r--r--moses/TranslationModel/UG/mm/mtt-dump.cc30
-rw-r--r--moses/TranslationModel/UG/mm/mtt.count.cc8
-rw-r--r--moses/TranslationModel/UG/mm/num_read_write.cc30
-rw-r--r--moses/TranslationModel/UG/mm/num_read_write.h8
-rw-r--r--moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h36
-rw-r--r--moses/TranslationModel/UG/mm/symal2mam.cc48
-rw-r--r--moses/TranslationModel/UG/mm/tpt_pickler.cc110
-rw-r--r--moses/TranslationModel/UG/mm/tpt_pickler.h36
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tightindex.cc212
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tightindex.h36
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tokenindex.cc90
-rw-r--r--moses/TranslationModel/UG/mm/tpt_tokenindex.h30
-rw-r--r--moses/TranslationModel/UG/mm/tpt_typedefs.h2
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.cc46
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext.h222
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_agenda.h46
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h98
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h16
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_jstats.cc20
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_jstats.h14
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_pstats.cc22
-rw-r--r--moses/TranslationModel/UG/mm/ug_bitext_pstats.h16
-rw-r--r--moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h10
-rw-r--r--moses/TranslationModel/UG/mm/ug_conll_record.h16
-rw-r--r--moses/TranslationModel/UG/mm/ug_corpus_token.cc6
-rw-r--r--moses/TranslationModel/UG/mm/ug_corpus_token.h28
-rw-r--r--moses/TranslationModel/UG/mm/ug_deptree.cc68
-rw-r--r--moses/TranslationModel/UG/mm/ug_deptree.h26
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_bitext.cc16
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_bitext.h38
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_tsa.h116
-rw-r--r--moses/TranslationModel/UG/mm/ug_im_ttrack.h56
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h30
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h42
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.cc26
-rw-r--r--moses/TranslationModel/UG/mm/ug_lexical_reordering.h6
-rw-r--r--moses/TranslationModel/UG/mm/ug_load_primer.h4
-rw-r--r--moses/TranslationModel/UG/mm/ug_lru_cache.h22
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_2d_table.h26
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_bitext.h20
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_tsa.h34
-rw-r--r--moses/TranslationModel/UG/mm/ug_mm_ttrack.h44
-rw-r--r--moses/TranslationModel/UG/mm/ug_mmbitext.cc66
-rw-r--r--moses/TranslationModel/UG/mm/ug_mmbitext.h46
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.cc16
-rw-r--r--moses/TranslationModel/UG/mm/ug_phrasepair.h122
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.cc82
-rw-r--r--moses/TranslationModel/UG/mm/ug_sampling_bias.h40
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_array_entry.h12
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_base.h220
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h12
-rw-r--r--moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h192
-rw-r--r--moses/TranslationModel/UG/mm/ug_ttrack_base.cc4
-rw-r--r--moses/TranslationModel/UG/mm/ug_ttrack_base.h116
-rw-r--r--moses/TranslationModel/UG/mm/ug_ttrack_position.h32
-rw-r--r--moses/TranslationModel/UG/mm/ug_typedefs.h2
-rw-r--r--moses/TranslationModel/UG/mmsapt.cpp186
-rw-r--r--moses/TranslationModel/UG/mmsapt.h84
-rw-r--r--moses/TranslationModel/UG/mmsapt_align.cc44
-rw-r--r--moses/TranslationModel/UG/ptable-describe-features.cc6
-rw-r--r--moses/TranslationModel/UG/ptable-lookup.cc24
-rw-r--r--moses/TranslationModel/UG/sapt_phrase_key.h2
-rw-r--r--moses/TranslationModel/UG/sapt_phrase_scorers.h2
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_base.h52
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_coherence.h14
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_lex1.h38
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_logcnt.h26
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_pbwd.h18
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_pfwd.h32
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_phrasecount.h10
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_provenance.h18
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_rareness.h14
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_unaligned.h24
-rw-r--r--moses/TranslationModel/UG/sapt_pscore_wordcount.h10
-rw-r--r--moses/TranslationModel/UG/sim-pe.cc18
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage.cc42
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage2.cc14
-rw-r--r--moses/TranslationModel/UG/spe-check-coverage3.cc46
-rw-r--r--moses/TranslationModel/UG/try-align.cc134
-rw-r--r--moses/TranslationModel/UG/try-align2.cc174
-rw-r--r--moses/TranslationModel/UG/util/ibm1-align.cc32
-rw-r--r--moses/TranslationModel/UG/util/tokenindex.dump.cc2
-rw-r--r--moses/TranslationModel/fuzzy-match/Vocabulary.cpp142
-rw-r--r--moses/TranslationOption.cpp4
-rw-r--r--moses/TranslationOption.h8
-rw-r--r--moses/TranslationOptionCollection.cpp6
-rw-r--r--moses/TranslationOptionCollection.h2
-rw-r--r--moses/TranslationOptionCollectionConfusionNet.cpp8
-rw-r--r--moses/TranslationOptionCollectionLattice.cpp4
-rw-r--r--moses/TranslationTask.cpp50
-rw-r--r--moses/TranslationTask.h32
-rw-r--r--moses/Util.h2
-rw-r--r--moses/WordLattice.h2
-rw-r--r--moses/server/Optimizer.cpp24
-rw-r--r--moses/server/Optimizer.h4
-rw-r--r--moses/server/TranslationRequest.cpp148
-rw-r--r--moses/server/TranslationRequest.h72
-rw-r--r--moses/server/Translator.cpp16
-rw-r--r--moses/server/Translator.h6
-rw-r--r--moses/server/Updater.cpp16
-rw-r--r--moses/server/Updater.h8
-rw-r--r--moses/thread_safe_container.h32
-rw-r--r--phrase-extract/ExtractionPhrasePair.cpp2
-rw-r--r--phrase-extract/XmlTree.h86
-rw-r--r--phrase-extract/consolidate-main.cpp14
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp2
-rw-r--r--phrase-extract/extract-mixed-syntax/pugiconfig.hpp2
-rw-r--r--phrase-extract/extract-mixed-syntax/pugixml.cpp2
-rw-r--r--phrase-extract/extract-mixed-syntax/pugixml.hpp96
-rw-r--r--phrase-extract/pcfg-common/pcfg.cc6
-rw-r--r--phrase-extract/pcfg-common/tool.cc6
-rw-r--r--phrase-extract/pcfg-common/xml_tree_parser.cc6
-rw-r--r--phrase-extract/pcfg-extract/main.cc6
-rw-r--r--phrase-extract/pcfg-extract/pcfg_extract.cc6
-rw-r--r--phrase-extract/pcfg-extract/rule_collection.cc6
-rw-r--r--phrase-extract/pcfg-extract/rule_extractor.cc6
-rw-r--r--phrase-extract/pcfg-score/main.cc6
-rw-r--r--phrase-extract/pcfg-score/pcfg_score.cc6
-rw-r--r--phrase-extract/pcfg-score/tree_scorer.cc6
-rw-r--r--search/applied.hh12
-rw-r--r--search/config.hh2
-rw-r--r--search/edge.hh8
-rw-r--r--search/edge_generator.cc12
-rw-r--r--search/header.hh2
-rw-r--r--search/nbest.cc4
-rw-r--r--search/nbest.hh6
-rw-r--r--search/rule.hh4
-rw-r--r--search/types.hh2
-rw-r--r--search/vertex.cc4
-rw-r--r--search/vertex.hh4
-rw-r--r--search/vertex_generator.hh6
-rw-r--r--symal/symal.cpp6
-rw-r--r--util/bit_packing.cc4
-rw-r--r--util/bit_packing.hh24
-rw-r--r--util/cat_compressed_main.cc2
-rw-r--r--util/ersatz_progress.cc2
-rw-r--r--util/ersatz_progress.hh6
-rw-r--r--util/exception.cc2
-rw-r--r--util/exception.hh16
-rw-r--r--util/file.hh6
-rw-r--r--util/file_piece.cc32
-rw-r--r--util/fixed_array.hh32
-rw-r--r--util/getopt.c2
-rw-r--r--util/getopt.hh2
-rw-r--r--util/mmap.cc2
-rw-r--r--util/mmap.hh22
-rw-r--r--util/multi_intersection.hh4
-rw-r--r--util/multi_intersection_test.cc2
-rw-r--r--util/murmur_hash.cc22
-rw-r--r--util/pcqueue.hh6
-rw-r--r--util/pool.hh4
-rw-r--r--util/probing_hash_table.hh12
-rw-r--r--util/read_compressed.cc8
-rw-r--r--util/read_compressed.hh8
-rw-r--r--util/scoped.hh2
-rw-r--r--util/sorted_uniform.hh6
-rw-r--r--util/sorted_uniform_test.cc2
-rw-r--r--util/stream/block.hh28
-rw-r--r--util/stream/chain.cc12
-rw-r--r--util/stream/chain.hh70
-rw-r--r--util/stream/config.hh16
-rw-r--r--util/stream/io.cc2
-rw-r--r--util/stream/io.hh6
-rw-r--r--util/stream/line_input.cc4
-rw-r--r--util/stream/multi_progress.cc4
-rw-r--r--util/stream/multi_progress.hh12
-rw-r--r--util/stream/multi_stream.hh2
-rw-r--r--util/stream/sort.hh60
-rw-r--r--util/stream/sort_test.cc2
-rw-r--r--util/stream/stream.hh2
-rw-r--r--util/stream/timer.hh2
-rw-r--r--util/thread_pool.hh2
-rw-r--r--util/tokenize_piece.hh2
-rw-r--r--util/usage.cc6
368 files changed, 5740 insertions, 5740 deletions
diff --git a/biconcor/Vocabulary.cpp b/biconcor/Vocabulary.cpp
index 9d52ee44e..f0f07c97d 100644
--- a/biconcor/Vocabulary.cpp
+++ b/biconcor/Vocabulary.cpp
@@ -1,101 +1,101 @@
-// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
-#include "Vocabulary.h"
-#include <fstream>
-
-namespace
-{
-
-const int MAX_LENGTH = 10000;
-
-} // namespace
-
-using namespace std;
-
-// as in beamdecoder/tables.cpp
-vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
-{
- vector< WORD_ID > token;
- bool betweenWords = true;
- int start=0;
- int i=0;
- for(; input[i] != '\0'; i++) {
- bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
- if (!isSpace && betweenWords) {
- start = i;
- betweenWords = false;
- } else if (isSpace && !betweenWords) {
- token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
- betweenWords = true;
- }
- }
- if (!betweenWords)
- token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
- return token;
-}
-
-WORD_ID Vocabulary::StoreIfNew( const WORD& word )
-{
- map<WORD, WORD_ID>::iterator i = lookup.find( word );
-
- if( i != lookup.end() )
- return i->second;
-
- WORD_ID id = vocab.size();
- vocab.push_back( word );
- lookup[ word ] = id;
- return id;
-}
-
-WORD_ID Vocabulary::GetWordID( const WORD &word ) const
-{
- map<WORD, WORD_ID>::const_iterator i = lookup.find( word );
- if( i == lookup.end() )
- return 0;
- WORD_ID w= (WORD_ID) i->second;
- return w;
-}
-
-void Vocabulary::Save(const string& fileName ) const
-{
- ofstream vcbFile;
- vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc);
-
- if (!vcbFile) {
- cerr << "Failed to open " << vcbFile << endl;
- exit(1);
- }
-
- vector< WORD >::const_iterator i;
- for(i = vocab.begin(); i != vocab.end(); i++) {
- const string &word = *i;
- vcbFile << word << endl;
- }
- vcbFile.close();
-}
-
-void Vocabulary::Load(const string& fileName )
-{
- ifstream vcbFile;
- char line[MAX_LENGTH];
- vcbFile.open(fileName.c_str());
-
- if (!vcbFile) {
- cerr << "no such file or directory: " << vcbFile << endl;
- exit(1);
- }
-
- cerr << "loading from " << fileName << endl;
- istream *fileP = &vcbFile;
- int count = 0;
- while(!fileP->eof()) {
- SAFE_GETLINE((*fileP), line, MAX_LENGTH, '\n');
- if (fileP->eof()) break;
- int length = 0;
- for(; line[length] != '\0'; length++);
- StoreIfNew( string( line, length ) );
- count++;
- }
- vcbFile.close();
- cerr << count << " word read, vocabulary size " << vocab.size() << endl;
-}
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+#include <fstream>
+
+namespace
+{
+
+const int MAX_LENGTH = 10000;
+
+} // namespace
+
+using namespace std;
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
+{
+ vector< WORD_ID > token;
+ bool betweenWords = true;
+ int start=0;
+ int i=0;
+ for(; input[i] != '\0'; i++) {
+ bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+ if (!isSpace && betweenWords) {
+ start = i;
+ betweenWords = false;
+ } else if (isSpace && !betweenWords) {
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ betweenWords = true;
+ }
+ }
+ if (!betweenWords)
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word )
+{
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+
+ if( i != lookup.end() )
+ return i->second;
+
+ WORD_ID id = vocab.size();
+ vocab.push_back( word );
+ lookup[ word ] = id;
+ return id;
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word ) const
+{
+ map<WORD, WORD_ID>::const_iterator i = lookup.find( word );
+ if( i == lookup.end() )
+ return 0;
+ WORD_ID w= (WORD_ID) i->second;
+ return w;
+}
+
+void Vocabulary::Save(const string& fileName ) const
+{
+ ofstream vcbFile;
+ vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc);
+
+ if (!vcbFile) {
+ cerr << "Failed to open " << vcbFile << endl;
+ exit(1);
+ }
+
+ vector< WORD >::const_iterator i;
+ for(i = vocab.begin(); i != vocab.end(); i++) {
+ const string &word = *i;
+ vcbFile << word << endl;
+ }
+ vcbFile.close();
+}
+
+void Vocabulary::Load(const string& fileName )
+{
+ ifstream vcbFile;
+ char line[MAX_LENGTH];
+ vcbFile.open(fileName.c_str());
+
+ if (!vcbFile) {
+ cerr << "no such file or directory: " << vcbFile << endl;
+ exit(1);
+ }
+
+ cerr << "loading from " << fileName << endl;
+ istream *fileP = &vcbFile;
+ int count = 0;
+ while(!fileP->eof()) {
+ SAFE_GETLINE((*fileP), line, MAX_LENGTH, '\n');
+ if (fileP->eof()) break;
+ int length = 0;
+ for(; line[length] != '\0'; length++);
+ StoreIfNew( string( line, length ) );
+ count++;
+ }
+ vcbFile.close();
+ cerr << count << " word read, vocabulary size " << vocab.size() << endl;
+}
diff --git a/contrib/c++tokenizer/tokenizer.cpp b/contrib/c++tokenizer/tokenizer.cpp
index 035ba2e97..6d3dd7046 100644
--- a/contrib/c++tokenizer/tokenizer.cpp
+++ b/contrib/c++tokenizer/tokenizer.cpp
@@ -46,7 +46,7 @@ RE2 curr_en_x("^[Nn]?[\'][\\p{L}]"); // english contraction suffixes conjoin to
RE2 pre_en_x(".*[\\p{L}\\p{N}]+$"); // valid english contraction prefixes
RE2 curr_fr_x(".*[\\p{L}\\p{N}]+[\']"); // french/italian contraction prefixes conjoin to the right
RE2 post_fr_x("^[\\p{L}\\p{N}]*"); // valid french/italian contraction suffixes
-// anything rarely used will just be given as a string and compiled on demand by RE2
+// anything rarely used will just be given as a string and compiled on demand by RE2
const char *
SPC_BYTE = " ";
@@ -85,8 +85,8 @@ const char *ESCAPE_MOSES[] = {
"&apos;", // ' 6 (27)
"&quot;", // " 7 (22)
};
-
-const std::set<std::string>
+
+const std::set<std::string>
ESCAPE_SET = {
std::string(ESCAPE_MOSES[0]),
std::string(ESCAPE_MOSES[1]),
@@ -98,7 +98,7 @@ ESCAPE_SET = {
std::string(ESCAPE_MOSES[7]),
};
-const std::map<std::wstring,gunichar>
+const std::map<std::wstring,gunichar>
ENTITY_MAP = {
{ std::wstring(L"&quot;"), L'"' },
{ std::wstring(L"&amp;"), L'&' },
@@ -355,7 +355,7 @@ ENTITY_MAP = {
{ std::wstring(L"&diams;"), L'\u2666' }
};
-inline gunichar
+inline gunichar
get_entity(gunichar *ptr, size_t len) {
// try hex, decimal entity first
gunichar ech(0);
@@ -380,16 +380,16 @@ get_entity(gunichar *ptr, size_t len) {
ech = 0;
}
}
- if (ech)
+ if (ech)
return ech;
- std::map<std::wstring,gunichar>::const_iterator it =
+ std::map<std::wstring,gunichar>::const_iterator it =
ENTITY_MAP.find(std::wstring((wchar_t *)(ptr),len));
return it != ENTITY_MAP.end() ? it->second : gunichar(0);
}
-inline gunichar
+inline gunichar
get_entity(char *ptr, size_t len) {
glong ulen = 0;
gunichar *gtmp = g_utf8_to_ucs4_fast((const gchar *)ptr, len, &ulen);
@@ -399,7 +399,7 @@ get_entity(char *ptr, size_t len) {
}
-inline std::string
+inline std::string
trim(const std::string& in)
{
std::size_t start = 0;
@@ -413,7 +413,7 @@ trim(const std::string& in)
}
-inline std::vector<std::string>
+inline std::vector<std::string>
split(const std::string& in)
{
std::vector<std::string> outv;
@@ -476,7 +476,7 @@ Tokenizer::Tokenizer(const Parameters& _)
//
// dtor deletes dynamically allocated per-language RE2 compiled expressions
//
-Tokenizer::~Tokenizer()
+Tokenizer::~Tokenizer()
{
for (auto& ptr : prot_pat_vec) {
if (ptr == &numprefixed_x || ptr == &quasinumeric_x)
@@ -491,7 +491,7 @@ Tokenizer::~Tokenizer()
// others into nbpre_gen_set
//
std::pair<int,int>
-Tokenizer::load_prefixes(std::ifstream& ifs)
+Tokenizer::load_prefixes(std::ifstream& ifs)
{
RE2 numonly("(.*)[\\s]+(\\#NUMERIC_ONLY\\#)");
std::string line;
@@ -547,7 +547,7 @@ Tokenizer::init(const char *cfg_dir_optional) {
try {
std::pair<int,int> counts = load_prefixes(cfg);
if (verbose_p) {
- std::cerr << "loaded " << counts.first << " non-numeric, "
+ std::cerr << "loaded " << counts.first << " non-numeric, "
<< counts.second << " numeric prefixes from "
<< nbpre_path << std::endl;
}
@@ -570,7 +570,7 @@ Tokenizer::init(const char *cfg_dir_optional) {
std::string protpat_path(cfg_dir);
protpat_path.append("/protected_pattern.").append(lang_iso);
// default to generic version
- if (::access(protpat_path.c_str(),R_OK))
+ if (::access(protpat_path.c_str(),R_OK))
protpat_path = protpat_path.substr(0,protpat_path.size()-lang_iso.size()-1);
prot_pat_vec.push_back(&numprefixed_x);
@@ -596,7 +596,7 @@ Tokenizer::init(const char *cfg_dir_optional) {
throw std::runtime_error(ess.str());
}
if (verbose_p) {
- std::cerr << "loaded " << npat << " protected patterns from "
+ std::cerr << "loaded " << npat << " protected patterns from "
<< protpat_path << std::endl;
}
} else if (verbose_p) {
@@ -612,7 +612,7 @@ Tokenizer::reset() {
//
// apply ctor-selected tokenization to a string, in-place, no newlines allowed,
-// assumes protections are applied already, some invariants are in place,
+// assumes protections are applied already, some invariants are in place,
// e.g. that successive chars <= ' ' have been normalized to a single ' '
//
void
@@ -633,7 +633,7 @@ Tokenizer::protected_tokenize(std::string& text) {
}
if (pos < textpc.size() && textpc[pos] != ' ')
words.push_back(textpc.substr(pos,textpc.size()-pos));
-
+
// regurgitate words with look-ahead handling for tokens with final mumble
std::string outs;
std::size_t nwords(words.size());
@@ -659,7 +659,7 @@ Tokenizer::protected_tokenize(std::string& text) {
// lower-case look-ahead does not break
sentence_break_p = false;
}
- }
+ }
outs.append(words[ii].data(),len);
if (sentence_break_p)
@@ -671,15 +671,15 @@ Tokenizer::protected_tokenize(std::string& text) {
}
-bool
+bool
Tokenizer::unescape(std::string& word) {
std::ostringstream oss;
std::size_t was = 0; // last processed
std::size_t pos = 0; // last unprocessed
std::size_t len = 0; // processed length
bool hit = false;
- for (std::size_t endp=0;
- (pos = word.find('&',was)) != std::string::npos && (endp = word.find(';',pos)) != std::string::npos;
+ for (std::size_t endp=0;
+ (pos = word.find('&',was)) != std::string::npos && (endp = word.find(';',pos)) != std::string::npos;
was = endp == std::string::npos ? pos : 1+endp) {
len = endp - pos + 1;
glong ulen(0);
@@ -703,7 +703,7 @@ Tokenizer::unescape(std::string& word) {
}
g_free(gtmp);
}
- if (was < word.size())
+ if (was < word.size())
oss << word.substr(was);
if (hit)
word = oss.str();
@@ -727,7 +727,7 @@ Tokenizer::escape(std::string& text) {
if (mod_p)
outs.append(pp,pt-pp+1);
} else {
- if (mod_p)
+ if (mod_p)
outs.append(pp,mk-pp);
pt = --mk;
}
@@ -751,7 +751,7 @@ Tokenizer::escape(std::string& text) {
} else if (*pt > ']') {
if (*pt =='|') { // 7c
sequence_p = ESCAPE_MOSES[0];
- }
+ }
} else if (*pt > 'Z') {
if (*pt == '<') { // 3e
sequence_p = ESCAPE_MOSES[4];
@@ -761,11 +761,11 @@ Tokenizer::escape(std::string& text) {
sequence_p = ESCAPE_MOSES[1];
} else if (*pt == ']') { // 5d
sequence_p = ESCAPE_MOSES[2];
- }
+ }
}
if (sequence_p) {
- if (pt > pp)
+ if (pt > pp)
outs.append(pp,pt-pp);
outs.append(sequence_p);
mod_p = true;
@@ -774,7 +774,7 @@ Tokenizer::escape(std::string& text) {
++pt;
}
}
-
+
if (mod_p) {
if (pp < pt) {
outs.append(pp,pt-pp);
@@ -795,13 +795,13 @@ Tokenizer::penn_tokenize(const std::string& buf)
std::string text(buf);
std::string outs;
- if (skip_alltags_p)
+ if (skip_alltags_p)
RE2::GlobalReplace(&text,genl_tags_x,SPC_BYTE);
// directed quote patches
size_t len = text.size();
- if (len > 2 && text.substr(0,2) == "``")
- text.replace(0,2,"`` ",3);
+ if (len > 2 && text.substr(0,2) == "``")
+ text.replace(0,2,"`` ",3);
else if (text[0] == '"')
text.replace(0,1,"`` ",3);
else if (text[0] == '`' || text[0] == '\'')
@@ -811,9 +811,9 @@ Tokenizer::penn_tokenize(const std::string& buf)
RE2::GlobalReplace(&text,x1_v_gg,one_gg);
RE2::GlobalReplace(&text,x1_v_g,"\\1 ` \\2");
RE2::GlobalReplace(&text,x1_v_q,"\\1 ` ");
-
+
// protect ellipsis
- for (size_t pos = text.find("..."); pos != std::string::npos; pos = text.find("...",pos+11))
+ for (size_t pos = text.find("..."); pos != std::string::npos; pos = text.find("...",pos+11))
text.replace(pos,3,"MANYELIPSIS",11);
// numeric commas
@@ -826,13 +826,13 @@ Tokenizer::penn_tokenize(const std::string& buf)
// isolable slash
RE2::GlobalReplace(&text,slash_x,special_refs);
-
+
// isolate final period
RE2::GlobalReplace(&text,final_x,"\\1 \\2\\3");
-
+
// isolate q.m., e.m.
RE2::GlobalReplace(&text,qx_x,isolate_ref);
-
+
// isolate braces
RE2::GlobalReplace(&text,braces_x,isolate_ref);
@@ -866,7 +866,7 @@ Tokenizer::penn_tokenize(const std::string& buf)
}
std::string ntext(SPC_BYTE);
ntext.append(text);
-
+
// convert double quote to paired single-quotes
RE2::GlobalReplace(&ntext,"\""," '' ");
@@ -894,7 +894,7 @@ Tokenizer::penn_tokenize(const std::string& buf)
RE2::GlobalReplace(&ntext," ([Ww])anna "," \\1an na ");
protected_tokenize(ntext);
-
+
// restore ellipsis
RE2::GlobalReplace(&ntext,"MANYELIPSIS","...");
@@ -919,7 +919,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
int num = 0;
// this is the main moses-compatible tokenizer
-
+
// push all the prefixes matching protected patterns
std::vector<std::string> prot_stack;
std::string match;
@@ -942,7 +942,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
}
}
}
-
+
const char *pt(text.c_str());
const char *ep(pt + text.size());
while (pt < ep && *pt >= 0 && *pt <= ' ')
@@ -990,8 +990,8 @@ Tokenizer::quik_tokenize(const std::string& buf)
if (!since_start) {
if (std::isalpha(char(*ucs4)))
alpha_prefix++;
- } else if (alpha_prefix == since_start
- && char(*ucs4) == ':'
+ } else if (alpha_prefix == since_start
+ && char(*ucs4) == ':'
&& next_type != G_UNICODE_SPACE_SEPARATOR) {
in_url_p = true;
}
@@ -1018,7 +1018,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
// fallthough
case G_UNICODE_UPPERCASE_LETTER:
case G_UNICODE_LOWERCASE_LETTER:
- if (downcase_p && curr_type == G_UNICODE_UPPERCASE_LETTER)
+ if (downcase_p && curr_type == G_UNICODE_UPPERCASE_LETTER)
curr_uch = g_unichar_tolower(*ucs4);
break;
case G_UNICODE_SPACING_MARK:
@@ -1082,8 +1082,8 @@ Tokenizer::quik_tokenize(const std::string& buf)
substitute_p = L"@-@";
post_break_p = pre_break_p = true;
} else if ( ( curr_uch > gunichar(L'\u002D') && curr_uch < gunichar(L'\u2010') ) ||
- ( curr_uch > gunichar(L'\u2011')
- && curr_uch != gunichar(L'\u30A0')
+ ( curr_uch > gunichar(L'\u2011')
+ && curr_uch != gunichar(L'\u30A0')
&& curr_uch < gunichar(L'\uFE63') ) ) {
// dash, not a hyphen
post_break_p = pre_break_p = true;
@@ -1151,7 +1151,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
default:
post_break_p = pre_break_p = prev_uch != curr_uch;
break;
- }
+ }
}
}
break;
@@ -1159,8 +1159,8 @@ Tokenizer::quik_tokenize(const std::string& buf)
switch (curr_uch) {
case gunichar(L':'):
case gunichar(L'/'):
- if (refined_p && !in_url_p
- && prev_type == G_UNICODE_DECIMAL_NUMBER
+ if (refined_p && !in_url_p
+ && prev_type == G_UNICODE_DECIMAL_NUMBER
&& next_type == G_UNICODE_DECIMAL_NUMBER) {
break;
}
@@ -1178,7 +1178,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
break;
case gunichar(L'&'):
if (unescape_p) {
- if (next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER
+ if (next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER
|| next_type == G_UNICODE_DECIMAL_NUMBER || next_uch == gunichar(L'#')) {
gunichar *eptr = nxt4;
GUnicodeType eptr_type(G_UNICODE_UNASSIGNED);
@@ -1223,16 +1223,16 @@ Tokenizer::quik_tokenize(const std::string& buf)
next_type = nxt4 < lim4 ? g_unichar_type(next_uch) : G_UNICODE_UNASSIGNED;
goto retry;
}
-
+
}
post_break_p = pre_break_p = !in_url_p || next_type != G_UNICODE_SPACE_SEPARATOR;
- if (escape_p)
+ if (escape_p)
substitute_p = L"&amp;";
break;
case gunichar(L'\''):
if (english_p) {
if (!in_url_p) {
- bool next_letter_p = next_type == G_UNICODE_LOWERCASE_LETTER
+ bool next_letter_p = next_type == G_UNICODE_LOWERCASE_LETTER
|| next_type == G_UNICODE_UPPERCASE_LETTER;
pre_break_p = true;
if (next_letter_p && refined_p) {
@@ -1241,9 +1241,9 @@ Tokenizer::quik_tokenize(const std::string& buf)
*(uptr - 1) = gunichar(L' ');
*(uptr++) = prev_uch;
pre_break_p = false;
- }
+ }
}
- post_break_p = since_start == 0
+ post_break_p = since_start == 0
|| (!next_letter_p && next_type != G_UNICODE_DECIMAL_NUMBER);
}
} else if (latin_p) {
@@ -1252,12 +1252,12 @@ Tokenizer::quik_tokenize(const std::string& buf)
} else {
post_break_p = pre_break_p = !in_url_p;
}
- if (escape_p)
+ if (escape_p)
substitute_p = L"&apos;";
break;
case gunichar(L'"'):
post_break_p = pre_break_p = true;
- if (escape_p)
+ if (escape_p)
substitute_p = L"&quot;";
break;
case gunichar(L','):
@@ -1303,7 +1303,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
}
}
// terminal isolated letter does not break
- } else if (class_follows_p(nxt4,lim4,G_UNICODE_LOWERCASE_LETTER) ||
+ } else if (class_follows_p(nxt4,lim4,G_UNICODE_LOWERCASE_LETTER) ||
g_unichar_type(*nxt4) == G_UNICODE_DASH_PUNCTUATION) {
// lower-case look-ahead does not break
} else {
@@ -1315,7 +1315,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
pre_break_p = true;
break;
}
- }
+ }
break;
}
} else {
@@ -1346,11 +1346,11 @@ Tokenizer::quik_tokenize(const std::string& buf)
case gunichar(L')'):
break;
case gunichar(L'['):
- if (escape_p)
+ if (escape_p)
substitute_p = L"&#91;";
break;
case gunichar(L']'):
- if (escape_p)
+ if (escape_p)
substitute_p = L"&#93;";
break;
default:
@@ -1377,7 +1377,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
if (english_p) {
if (!in_url_p) {
pre_break_p = true;
- post_break_p = since_start == 0 ||
+ post_break_p = since_start == 0 ||
(next_type != G_UNICODE_LOWERCASE_LETTER && next_type != G_UNICODE_UPPERCASE_LETTER && next_type != G_UNICODE_DECIMAL_NUMBER);
}
} else if (latin_p) {
@@ -1386,23 +1386,23 @@ Tokenizer::quik_tokenize(const std::string& buf)
} else {
post_break_p = pre_break_p = !in_url_p;
}
- if (escape_p)
+ if (escape_p)
substitute_p = L"&apos;";
- else
+ else
curr_uch = gunichar(L'\'');
break;
case gunichar(L'|'):
- if (escape_p)
+ if (escape_p)
substitute_p = L"&#124;";
post_break_p = pre_break_p = true;
break;
case gunichar(L'<'):
- if (escape_p)
+ if (escape_p)
substitute_p = L"&lt;";
post_break_p = pre_break_p = true;
break;
case gunichar(L'>'):
- if (escape_p)
+ if (escape_p)
substitute_p = L"&gt;";
post_break_p = pre_break_p = true;
break;
@@ -1414,7 +1414,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
case gunichar(L'='):
case gunichar(L'~'):
in_num_p = false;
- post_break_p = pre_break_p = !in_url_p;
+ post_break_p = pre_break_p = !in_url_p;
break;
case gunichar(L'+'):
post_break_p = pre_break_p = !in_url_p;
@@ -1444,12 +1444,12 @@ Tokenizer::quik_tokenize(const std::string& buf)
curr_uch = gunichar(L' ');
} else if (curr_uch < gunichar(L' ')) {
curr_uch = gunichar(L' ');
- } else if (curr_uch == gunichar(L'\u0092') &&
+ } else if (curr_uch == gunichar(L'\u0092') &&
(next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER)) {
// observed corpus corruption case
if (english_p) {
pre_break_p = true;
- post_break_p = since_start == 0 ||
+ post_break_p = since_start == 0 ||
(next_type != G_UNICODE_LOWERCASE_LETTER && next_type != G_UNICODE_UPPERCASE_LETTER && next_type != G_UNICODE_DECIMAL_NUMBER);
} else if (latin_p) {
post_break_p = true;
@@ -1457,9 +1457,9 @@ Tokenizer::quik_tokenize(const std::string& buf)
} else {
post_break_p = pre_break_p = true;
}
- if (escape_p)
+ if (escape_p)
substitute_p = L"&apos;";
- else
+ else
curr_uch = gunichar(L'\'');
} else {
post_break_p = pre_break_p = true;
@@ -1491,7 +1491,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
in_url_p = in_num_p = false;
break;
}
-
+
if (pre_break_p || curr_uch == gunichar(L' ') || (bad_length && curr_type != G_UNICODE_UNASSIGNED)) {
if (since_start) {
// non-empty token emitted previously, so pre-break must emit token separator
@@ -1501,8 +1501,8 @@ Tokenizer::quik_tokenize(const std::string& buf)
if (curr_uch == gunichar(L' '))
// suppress emission below, fall-through to substitute logic
curr_uch = 0;
- }
-
+ }
+
if (substitute_p) {
for (gunichar *sptr = (gunichar *)substitute_p; *sptr; ++sptr) {
*uptr++ = *sptr;
@@ -1521,7 +1521,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
glong nbytes = 0;
gchar *utf8 = g_ucs4_to_utf8(ubuf,uptr-ubuf,0,&nbytes,0); // g_free
- if (utf8[nbytes-1] == ' ')
+ if (utf8[nbytes-1] == ' ')
--nbytes;
text.assign((const char *)utf8,(const char *)(utf8 + nbytes));
g_free(utf8);
@@ -1552,7 +1552,7 @@ Tokenizer::quik_tokenize(const std::string& buf)
}
-std::size_t
+std::size_t
Tokenizer::tokenize(std::istream& is, std::ostream& os)
{
std::size_t line_no = 0;
@@ -1561,10 +1561,10 @@ Tokenizer::tokenize(std::istream& is, std::ostream& os)
std::vector< std::vector< std::string > > results(nthreads);
std::vector< boost::thread > workers(nthreads);
bool done_p = !(is.good() && os.good());
-
+
for (std::size_t tranche = 0; !done_p; ++tranche) {
-
+
// for loop starting threads for chunks of input
for (std::size_t ithread = 0; ithread < nthreads; ++ithread) {
@@ -1589,19 +1589,19 @@ Tokenizer::tokenize(std::istream& is, std::ostream& os)
results[ithread].resize(line_pos);
break;
}
- lines[ithread][line_pos].clear();
- } else if (skip_xml_p &&
- (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) {
- lines[ithread][line_pos].clear();
+ lines[ithread][line_pos].clear();
+ } else if (skip_xml_p &&
+ (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) {
+ lines[ithread][line_pos].clear();
} else {
- lines[ithread][line_pos] =
- std::string(SPC_BYTE).append(istr).append(SPC_BYTE);
+ lines[ithread][line_pos] =
+ std::string(SPC_BYTE).append(istr).append(SPC_BYTE);
}
- }
+ }
if (line_pos) {
- workers[ithread] =
- boost::thread(VectorTokenizerCallable(this,lines[ithread],results[ithread]));
+ workers[ithread] =
+ boost::thread(VectorTokenizerCallable(this,lines[ithread],results[ithread]));
}
} // end for loop starting threads
@@ -1616,22 +1616,22 @@ Tokenizer::tokenize(std::istream& is, std::ostream& os)
if (nlin != nres) {
std::ostringstream emsg;
- emsg << "Tranche " << tranche
- << " worker " << ithread << "/" << nthreads
+ emsg << "Tranche " << tranche
+ << " worker " << ithread << "/" << nthreads
<< " |lines|==" << nlin << " != |results|==" << nres;
throw std::runtime_error(emsg.str());
}
- for (std::size_t ires = 0; ires < nres; ++ires)
+ for (std::size_t ires = 0; ires < nres; ++ires)
os << results[ithread][ires] << std::endl;
} // end loop over joined results
-
+
if (verbose_p) {
std::cerr << line_no << ' ';
std::cerr.flush();
}
-
+
} // end loop over chunks
return line_no;
@@ -1642,18 +1642,18 @@ std::string
Tokenizer::detokenize(const std::string& buf)
{
std::vector<std::string> words = split(trim(buf));
-
+
std::size_t squotes = 0;
std::size_t dquotes = 0;
std::string prepends("");
std::ostringstream oss;
-
+
std::size_t nwords = words.size();
std::size_t iword = 0;
- if (unescape_p)
- for (auto &word: words)
+ if (unescape_p)
+ for (auto &word: words)
unescape(word);
for (auto &word: words) {
@@ -1665,13 +1665,13 @@ Tokenizer::detokenize(const std::string& buf)
} else if (RE2::FullMatch(word,left_x)) {
oss << word;
prepends = SPC_BYTE;
- } else if (english_p && iword
- && RE2::FullMatch(word,curr_en_x)
+ } else if (english_p && iword
+ && RE2::FullMatch(word,curr_en_x)
&& RE2::FullMatch(words[iword-1],pre_en_x)) {
oss << word;
prepends = SPC_BYTE;
- } else if (latin_p && iword < nwords - 2
- && RE2::FullMatch(word,curr_fr_x)
+ } else if (latin_p && iword < nwords - 2
+ && RE2::FullMatch(word,curr_fr_x)
&& RE2::FullMatch(words[iword+1],post_fr_x)) {
oss << prepends << word;
prepends.clear();
@@ -1679,7 +1679,7 @@ Tokenizer::detokenize(const std::string& buf)
if ((word.at(0) == '\'' && ((squotes % 2) == 0 )) ||
(word.at(0) == '"' && ((dquotes % 2) == 0))) {
if (english_p && iword
- && word.at(0) == '\''
+ && word.at(0) == '\''
&& std::tolower(words[iword-1].at(words[iword-1].size()-1)) == 's') {
oss << word;
prepends = SPC_BYTE;
@@ -1698,7 +1698,7 @@ Tokenizer::detokenize(const std::string& buf)
prepends = SPC_BYTE;
if (word.at(0) == '\'')
squotes++;
- else if (word.at(0) == '"')
+ else if (word.at(0) == '"')
dquotes++;
}
} else {
@@ -1707,8 +1707,8 @@ Tokenizer::detokenize(const std::string& buf)
}
iword++;
}
-
-
+
+
std::string text(oss.str());
RE2::GlobalReplace(&text," +",SPC_BYTE);
RE2::GlobalReplace(&text,"\n ","\n");
@@ -1718,14 +1718,14 @@ Tokenizer::detokenize(const std::string& buf)
std::size_t
-Tokenizer::detokenize(std::istream& is, std::ostream& os)
+Tokenizer::detokenize(std::istream& is, std::ostream& os)
{
size_t line_no = 0;
while (is.good() && os.good()) {
std::string istr;
std::getline(is,istr);
line_no ++;
- if (istr.empty())
+ if (istr.empty())
continue;
if (skip_xml_p && (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) {
os << istr << std::endl;
@@ -1749,7 +1749,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
return parts;
}
gunichar *uout = (gunichar *)g_malloc0(2*ncp*sizeof(gunichar));
-
+
const wchar_t GENL_HYPH = L'\u2010';
const wchar_t IDEO_STOP = L'\u3002';
const wchar_t KANA_MDOT = L'\u30FB';
@@ -1786,7 +1786,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
std::vector<std::size_t> breaks;
std::set<std::size_t> suppress;
-
+
for (; icp <= ncp; ++icp) {
currwc = wchar_t(ucs4[icp]);
curr_type = g_unichar_type(currwc);
@@ -1798,7 +1798,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
case G_UNICODE_OTHER_NUMBER:
curr_class = numba;
curr_word_p = true;
- break;
+ break;
case G_UNICODE_LOWERCASE_LETTER:
case G_UNICODE_MODIFIER_LETTER:
case G_UNICODE_OTHER_LETTER:
@@ -1822,7 +1822,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
} else if (currwc >= SMAL_HYPH) {
curr_word_p = true;
} else {
- curr_word_p = (currwc >= WAVE_DASH) && (currwc <= KANA_DHYP);
+ curr_word_p = (currwc >= WAVE_DASH) && (currwc <= KANA_DHYP);
}
break;
case G_UNICODE_CLOSE_PUNCTUATION:
@@ -1860,7 +1860,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
curr_word_p = false;
break;
}
-
+
// # condition for prefix test
// $words[$i] =~ /([\p{IsAlnum}\.\-]*)([\'\"\)\]\%\p{IsPf}]*)(\.+)$/
// $words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/
@@ -1875,7 +1875,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
} else if (curr_word_p) {
if (!fini_word) {
init_word = ocp;
- }
+ }
fini_word = ocp+1;
dotslen = finilen = 0;
} else if (curr_class >= quote && curr_class <= pfpct && curr_class != pinit) {
@@ -1893,7 +1893,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
} else {
init_word = fini_word = 0;
}
-
+
if (check_abbr_p) {
// not a valid word character or post-word punctuation character: check word
std::wstring k((wchar_t *)uout+init_word,fini_word-init_word);
@@ -1986,7 +1986,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
}
init_word = fini_word = 0;
}
-
+
if (seqpos >= SEQ_LIM) {
seqpos = 0;
}
@@ -2015,7 +2015,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
continue;
}
}
-
+
if (!seqpos) {
if (curr_class != blank) {
uout[ocp++] = gunichar(currwc);
@@ -2024,7 +2024,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
}
continue;
}
-
+
if (curr_class == blank) {
if (prev_class != blank) {
seq[seqpos] = blank;
@@ -2034,7 +2034,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
}
if (icp < ncp)
continue;
- }
+ }
if (curr_class >= quote && curr_class <= pfini) {
if (prev_class < quote || prev_class > pfini) {
@@ -2158,8 +2158,8 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
if (uout[chkpos] == L'\n' || uout[chkpos] == L' ') {
endpos = chkpos;
continue;
- }
- if (g_unichar_isgraph(uout[chkpos]))
+ }
+ if (g_unichar_isgraph(uout[chkpos]))
break;
endpos = chkpos;
}
@@ -2171,17 +2171,17 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) {
if (continuation_ptr)
*continuation_ptr = endpos > iop;
iop = nextpos;
- }
-
+ }
+
g_free(uout);
g_free(ucs4);
-
+
return parts;
}
std::pair<std::size_t,std::size_t>
-Tokenizer::splitter(std::istream& is, std::ostream& os)
+Tokenizer::splitter(std::istream& is, std::ostream& os)
{
std::pair<std::size_t,std::size_t> counts = { 0, 0 };
bool continuation_p = false;
@@ -2197,7 +2197,7 @@ Tokenizer::splitter(std::istream& is, std::ostream& os)
if (istr.empty() && (is.eof() ||!para_marks_p))
continue;
- if (skip_xml_p && (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x)))
+ if (skip_xml_p && (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x)))
continue;
std::vector<std::string> sentences(splitter(istr,&continuation_p));
@@ -2221,13 +2221,13 @@ Tokenizer::splitter(std::istream& is, std::ostream& os)
os << " ";
pending_gap = false;
}
-
- for (std::size_t ii = 0; ii < nsents-1; ++ii)
+
+ for (std::size_t ii = 0; ii < nsents-1; ++ii)
os << sentences[ii] << std::endl;
-
+
os << sentences[nsents-1];
- if (continuation_p)
+ if (continuation_p)
pending_gap = !split_breaks_p;
if (!pending_gap)
os << std::endl;
diff --git a/contrib/c++tokenizer/tokenizer.h b/contrib/c++tokenizer/tokenizer.h
index cc1de2770..978f20197 100644
--- a/contrib/c++tokenizer/tokenizer.h
+++ b/contrib/c++tokenizer/tokenizer.h
@@ -26,7 +26,7 @@ class Tokenizer {
private:
- typedef enum {
+ typedef enum {
empty = 0,
blank,
upper, // upper case
@@ -56,7 +56,7 @@ private:
// non-breaking prefixes (other) ucs4
std::set<std::wstring> nbpre_gen_ucs4;
- // compiled protected patterns
+ // compiled protected patterns
std::vector<re2::RE2 *> prot_pat_vec;
protected:
@@ -96,10 +96,10 @@ protected:
Tokenizer *tokenizer;
std::vector<std::string>& in;
std::vector<std::string>& out;
-
- VectorTokenizerCallable(Tokenizer *_tokenizer,
- std::vector<std::string>& _in,
- std::vector<std::string>& _out)
+
+ VectorTokenizerCallable(Tokenizer *_tokenizer,
+ std::vector<std::string>& _in,
+ std::vector<std::string>& _out)
: tokenizer(_tokenizer)
, in(_in)
, out(_out) {
@@ -107,10 +107,10 @@ protected:
void operator()() {
out.resize(in.size());
- for (std::size_t ii = 0; ii < in.size(); ++ii)
+ for (std::size_t ii = 0; ii < in.size(); ++ii)
if (in[ii].empty())
out[ii] = in[ii];
- else if (tokenizer->penn_p)
+ else if (tokenizer->penn_p)
out[ii] = tokenizer->penn_tokenize(in[ii]);
else
out[ii] = tokenizer->quik_tokenize(in[ii]);
diff --git a/contrib/c++tokenizer/tokenizer_main.cpp b/contrib/c++tokenizer/tokenizer_main.cpp
index 7adb599e7..358a68cc3 100644
--- a/contrib/c++tokenizer/tokenizer_main.cpp
+++ b/contrib/c++tokenizer/tokenizer_main.cpp
@@ -10,8 +10,8 @@ using namespace TOKENIZER_NAMESPACE ;
#endif
-void
-usage(const char *path)
+void
+usage(const char *path)
{
std::cerr << "Usage: " << path << "[-{v|x|p|a|e|s|u|n|N]* [LL] [-{c|o} PATH]* INFILE*" << std::endl;
std::cerr << " -a -- aggressive hyphenization" << std::endl;
@@ -89,7 +89,7 @@ copy_words(Tokenizer& tize, std::istream& ifs, std::ostream& ofs) {
int nlines = 0;
std::string line;
while (ifs.good() && std::getline(ifs,line)) {
- if (line.empty())
+ if (line.empty())
continue;
std::vector<std::string> tokens(tize.tokens(line));
int count = 0;
@@ -127,7 +127,7 @@ copy_words(Tokenizer& tize, std::istream& ifs, std::ostream& ofs) {
}
-int main(int ac, char **av)
+int main(int ac, char **av)
{
int rc = 0;
Parameters params;
@@ -140,7 +140,7 @@ int main(int ac, char **av)
if (!detokenize_p)
params.split_p = std::strstr(av[0],"splitter") != 0;
- while (++av,--ac) {
+ while (++av,--ac) {
if (**av == '-') {
switch (av[0][1]) {
case 'a':
@@ -244,7 +244,7 @@ int main(int ac, char **av)
if (comma) {
*comma++ = 0;
params.chunksize = std::strtoul(comma,0,0);
- }
+ }
params.nthreads = std::strtoul(*av,0,0);
} else {
params.args.push_back(std::string(*av));
@@ -275,7 +275,7 @@ int main(int ac, char **av)
cfg_mos_str.append("/moses");
if (!::access(cfg_mos_str.c_str(),X_OK)) {
params.cfg_path = strdup(cfg_mos_str.c_str());
- } else if (!::access(cfg_shr_str.c_str(),X_OK)) {
+ } else if (!::access(cfg_shr_str.c_str(),X_OK)) {
params.cfg_path = strdup(cfg_shr_str.c_str());
} else if (!::access(cfg_dir_str.c_str(),X_OK)) {
params.cfg_path = strdup(cfg_dir_str.c_str());
@@ -287,7 +287,7 @@ int main(int ac, char **av)
if (params.verbose_p) {
std::cerr << "config path: " << params.cfg_path << std::endl;
}
- }
+ }
std::unique_ptr<std::ofstream> pofs = 0;
if (!params.out_path.empty()) {
@@ -345,7 +345,7 @@ int main(int ac, char **av)
if (plines.second) {
std::cerr << "%%% " << plines.second << " sentences." << std::endl;
}
- }
+ }
return rc;
}
diff --git a/contrib/eppex/ISS.h b/contrib/eppex/ISS.h
index 7921fcbf8..9c4e1fc22 100644
--- a/contrib/eppex/ISS.h
+++ b/contrib/eppex/ISS.h
@@ -1,236 +1,236 @@
-/**
- * ISS (Indexed Strings Storage) - memory efficient storage for permanent strings.
- *
- * Implementation note: use #define USE_HASHSET to switch between implementation
- * using __gnu_cxx::hash_set and implementation using std::set.
- *
- * (C) Ceslav Przywara, UFAL MFF UK, 2011
- *
- * $Id$
- */
-
-#ifndef _ISS_H
-#define _ISS_H
-
-#include <limits>
-#include <vector>
-#include <string.h>
-
-// Use hashset instead of std::set for string-to-number indexing?
-#ifdef USE_HASHSET
-#include <ext/hash_set>
-#else
-#include <set>
-#endif
-
-#include <boost/pool/pool.hpp>
-
-#ifdef USE_HASHSET
-// Forward declaration of comparator functor.
-template<class IndType>
-class StringsEqualComparator;
-
-template<class IndType>
-class Hasher;
-#else
-// Forward declaration of comparator functor.
-template<class IndType>
-class StringsLessComparator;
-#endif
-
-/**
- */
-template<class IndType>
-class IndexedStringsStorage {
-
-public:
-
- typedef IndType index_type;
-
-#ifdef USE_HASHSET
- typedef StringsEqualComparator<IndType> equality_comparator_t;
-
- typedef Hasher<IndType> hasher_t;
-
- /** @typedef Hash set used as lookup table (string -> numeric index). */
- typedef __gnu_cxx::hash_set<IndType, hasher_t, equality_comparator_t> index_t;
-#else
- typedef StringsLessComparator<IndType> less_comparator_t;
-
- /** @typedef Set used as lookup table (string -> numeric index). */
- typedef std::set<IndType, less_comparator_t> index_t;
-#endif
- /** @typedef Container of pointers to stored C-strings. Acts as
- * conversion table: numeric index -> string.
- */
- typedef std::vector<const char*> table_t;
-
-private:
-
- /** @var memory pool used to store C-strings */
- boost::pool<> _storage;
-
- /** @var index-to-string conversion table */
- table_t _table;
-
- /** @var index lookup table */
- index_t _index;
-
-public:
- /** Default constructor.
- */
- IndexedStringsStorage(void);
-
- /** @return True, if the indices are exhausted (new strings cannot be stored).
- */
- inline bool is_full(void) const { return _table.size() == std::numeric_limits<IndType>::max(); }
-
- /** Retrieves pointer to C-string instance represented by given index.
- * Note: No range checks are performed!
- * @param index Index of C-string to retrieve.
- * @return Pointer to stored C-string instance.
- */
- inline const char* get(IndType index) const { return _table[index]; }
-
- /** Stores the string and returns its numeric index.
- * @param str Pointer to C-string to store.
- * @return Index of stored copy of str.
- * @throw std::bad_alloc When insertion of new string would cause
- * overflow of indices datatype.
- */
- IndType put(const char* str);
-
- /** @return Number of unique strings stored so far.
- */
- inline table_t::size_type size(void) const { return _table.size(); }
-};
-
-
-/** Functor designed for less than comparison of C-strings stored within StringStore.
- * @param IndType Type of numerical indices of strings within given StringStore.
- */
-#ifdef USE_HASHSET
-template<class IndType>
-class StringsEqualComparator: public std::binary_function<IndType, IndType, bool> {
-#else
-template<class IndType>
-class StringsLessComparator: public std::binary_function<IndType, IndType, bool> {
-#endif
- /** @var conversion table: index -> string (necessary for indices comparison) */
- const typename IndexedStringsStorage<IndType>::table_t& _table;
-public:
-#ifdef USE_HASHSET
- StringsEqualComparator<IndType>(const typename IndexedStringsStorage<IndType>::table_t& table): _table(table) {}
-#else
- StringsLessComparator<IndType>(const typename IndexedStringsStorage<IndType>::table_t& table): _table(table) {}
-#endif
-
- /** Comparison of two pointers to C-strings.
- * @param lhs Pointer to 1st C-string.
- * @param rhs Pointer to 2nd C-string.
- * @return True, if 1st argument is equal/less than 2nd argument.
- */
- inline bool operator()(IndType lhs, IndType rhs) const {
-#ifdef USE_HASHSET
- return strcmp(_table[lhs], _table[rhs]) == 0;
-#else
- return strcmp(_table[lhs], _table[rhs]) < 0;
-#endif
- }
-};
-
-#ifdef USE_HASHSET
-/** Functor... TODO.
- */
-template<class IndType>
-class Hasher: public std::unary_function<IndType, size_t> {
-
- __gnu_cxx::hash<const char*> _hash;
-
- /** @var conversion table: index -> string (necessary for indices comparison) */
- const typename IndexedStringsStorage<IndType>::table_t& _table;
-
-public:
- /** */
- Hasher<IndType>(const typename IndexedStringsStorage<IndType>::table_t& table): _hash(), _table(table) {}
-
- /** Hashing function.
- * @param index
- * @return Counted hash.
- */
- inline size_t operator()(const IndType index) const {
- return _hash(_table[index]);
- }
-};
-#endif
-
-template <class IndType>
-#ifdef USE_HASHSET
-IndexedStringsStorage<IndType>::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(100, hasher_t(_table), equality_comparator_t(_table)) {}
-#else
-IndexedStringsStorage<IndType>::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(less_comparator_t(_table)) {}
-#endif
-
-template <class IndType>
-IndType IndexedStringsStorage<IndType>::put(const char* str) {
-
- if ( this->is_full() ) {
- // What a pity, not a single index left to spend.
- throw std::bad_alloc();
- }
-
- // To use the index for lookup we first have to store passed string
- // in conversion table (cause during lookup we compare the strings indirectly
- // by using their indices).
- // Note: thread unsafe! TODO: Redesing.
- IndType index = static_cast<IndType>(_table.size());
- _table.push_back(str);
-
-#ifdef USE_HASHSET
- //
- typename index_t::iterator iIndex = _index.find(index);
-#else
- // A lower_bound() search enables us to use "found" iterator as a hint for
- // eventual insertion.
- typename index_t::iterator iIndex = _index.lower_bound(index);
-#endif
-
- if ( (iIndex != _index.end())
-#ifndef USE_HASHSET
- // In case of lower_bound() search we have to also compare found item
- // with passed string.
- && (strcmp(_table[*iIndex], str) == 0)
-#endif
- ) {
- // String is already present in storage!
- // Pop back temporary stored pointer...
- _table.pop_back();
- // ...and return numeric index to already stored copy of `str`.
- return static_cast<IndType>(*iIndex);
- }
-
- // String not found within storage.
-
- // Allocate memory required for string storage...
- char* mem = static_cast<char*>(_storage.ordered_malloc(strlen(str) + 1));
- // ...and fill it with copy of passed string.
- strcpy(mem, str);
-
- // Overwrite temporary stored pointer to `str` with pointer to freshly
- // saved copy.
- _table[index] = mem;
-
-#ifdef USE_HASHSET
- // Insert the index into lookup table.
- _index.insert(index);
-#else
- // Insert the index into lookup table (use previously retrieved iterator
- // as a hint).
- _index.insert(iIndex, index);
-#endif
-
- // Finally.
- return index;
-}
-
-#endif
+/**
+ * ISS (Indexed Strings Storage) - memory efficient storage for permanent strings.
+ *
+ * Implementation note: use #define USE_HASHSET to switch between implementation
+ * using __gnu_cxx::hash_set and implementation using std::set.
+ *
+ * (C) Ceslav Przywara, UFAL MFF UK, 2011
+ *
+ * $Id$
+ */
+
+#ifndef _ISS_H
+#define _ISS_H
+
+#include <limits>
+#include <vector>
+#include <string.h>
+
+// Use hashset instead of std::set for string-to-number indexing?
+#ifdef USE_HASHSET
+#include <ext/hash_set>
+#else
+#include <set>
+#endif
+
+#include <boost/pool/pool.hpp>
+
+#ifdef USE_HASHSET
+// Forward declaration of comparator functor.
+template<class IndType>
+class StringsEqualComparator;
+
+template<class IndType>
+class Hasher;
+#else
+// Forward declaration of comparator functor.
+template<class IndType>
+class StringsLessComparator;
+#endif
+
+/**
+ */
+template<class IndType>
+class IndexedStringsStorage {
+
+public:
+
+ typedef IndType index_type;
+
+#ifdef USE_HASHSET
+ typedef StringsEqualComparator<IndType> equality_comparator_t;
+
+ typedef Hasher<IndType> hasher_t;
+
+ /** @typedef Hash set used as lookup table (string -> numeric index). */
+ typedef __gnu_cxx::hash_set<IndType, hasher_t, equality_comparator_t> index_t;
+#else
+ typedef StringsLessComparator<IndType> less_comparator_t;
+
+ /** @typedef Set used as lookup table (string -> numeric index). */
+ typedef std::set<IndType, less_comparator_t> index_t;
+#endif
+ /** @typedef Container of pointers to stored C-strings. Acts as
+ * conversion table: numeric index -> string.
+ */
+ typedef std::vector<const char*> table_t;
+
+private:
+
+ /** @var memory pool used to store C-strings */
+ boost::pool<> _storage;
+
+ /** @var index-to-string conversion table */
+ table_t _table;
+
+ /** @var index lookup table */
+ index_t _index;
+
+public:
+ /** Default constructor.
+ */
+ IndexedStringsStorage(void);
+
+ /** @return True, if the indices are exhausted (new strings cannot be stored).
+ */
+ inline bool is_full(void) const { return _table.size() == std::numeric_limits<IndType>::max(); }
+
+ /** Retrieves pointer to C-string instance represented by given index.
+ * Note: No range checks are performed!
+ * @param index Index of C-string to retrieve.
+ * @return Pointer to stored C-string instance.
+ */
+ inline const char* get(IndType index) const { return _table[index]; }
+
+ /** Stores the string and returns its numeric index.
+ * @param str Pointer to C-string to store.
+ * @return Index of stored copy of str.
+ * @throw std::bad_alloc When insertion of new string would cause
+ * overflow of indices datatype.
+ */
+ IndType put(const char* str);
+
+ /** @return Number of unique strings stored so far.
+ */
+ inline table_t::size_type size(void) const { return _table.size(); }
+};
+
+
+/** Functor designed for less than comparison of C-strings stored within StringStore.
+ * @param IndType Type of numerical indices of strings within given StringStore.
+ */
+#ifdef USE_HASHSET
+template<class IndType>
+class StringsEqualComparator: public std::binary_function<IndType, IndType, bool> {
+#else
+template<class IndType>
+class StringsLessComparator: public std::binary_function<IndType, IndType, bool> {
+#endif
+ /** @var conversion table: index -> string (necessary for indices comparison) */
+ const typename IndexedStringsStorage<IndType>::table_t& _table;
+public:
+#ifdef USE_HASHSET
+ StringsEqualComparator<IndType>(const typename IndexedStringsStorage<IndType>::table_t& table): _table(table) {}
+#else
+ StringsLessComparator<IndType>(const typename IndexedStringsStorage<IndType>::table_t& table): _table(table) {}
+#endif
+
+ /** Comparison of two pointers to C-strings.
+ * @param lhs Pointer to 1st C-string.
+ * @param rhs Pointer to 2nd C-string.
+ * @return True, if 1st argument is equal/less than 2nd argument.
+ */
+ inline bool operator()(IndType lhs, IndType rhs) const {
+#ifdef USE_HASHSET
+ return strcmp(_table[lhs], _table[rhs]) == 0;
+#else
+ return strcmp(_table[lhs], _table[rhs]) < 0;
+#endif
+ }
+};
+
+#ifdef USE_HASHSET
+/** Functor... TODO.
+ */
+template<class IndType>
+class Hasher: public std::unary_function<IndType, size_t> {
+
+ __gnu_cxx::hash<const char*> _hash;
+
+ /** @var conversion table: index -> string (necessary for indices comparison) */
+ const typename IndexedStringsStorage<IndType>::table_t& _table;
+
+public:
+ /** */
+ Hasher<IndType>(const typename IndexedStringsStorage<IndType>::table_t& table): _hash(), _table(table) {}
+
+ /** Hashing function.
+ * @param index
+ * @return Counted hash.
+ */
+ inline size_t operator()(const IndType index) const {
+ return _hash(_table[index]);
+ }
+};
+#endif
+
+template <class IndType>
+#ifdef USE_HASHSET
+IndexedStringsStorage<IndType>::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(100, hasher_t(_table), equality_comparator_t(_table)) {}
+#else
+IndexedStringsStorage<IndType>::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(less_comparator_t(_table)) {}
+#endif
+
+template <class IndType>
+IndType IndexedStringsStorage<IndType>::put(const char* str) {
+
+ if ( this->is_full() ) {
+ // What a pity, not a single index left to spend.
+ throw std::bad_alloc();
+ }
+
+ // To use the index for lookup we first have to store passed string
+ // in conversion table (cause during lookup we compare the strings indirectly
+ // by using their indices).
+ // Note: thread unsafe! TODO: Redesing.
+ IndType index = static_cast<IndType>(_table.size());
+ _table.push_back(str);
+
+#ifdef USE_HASHSET
+ //
+ typename index_t::iterator iIndex = _index.find(index);
+#else
+ // A lower_bound() search enables us to use "found" iterator as a hint for
+ // eventual insertion.
+ typename index_t::iterator iIndex = _index.lower_bound(index);
+#endif
+
+ if ( (iIndex != _index.end())
+#ifndef USE_HASHSET
+ // In case of lower_bound() search we have to also compare found item
+ // with passed string.
+ && (strcmp(_table[*iIndex], str) == 0)
+#endif
+ ) {
+ // String is already present in storage!
+ // Pop back temporary stored pointer...
+ _table.pop_back();
+ // ...and return numeric index to already stored copy of `str`.
+ return static_cast<IndType>(*iIndex);
+ }
+
+ // String not found within storage.
+
+ // Allocate memory required for string storage...
+ char* mem = static_cast<char*>(_storage.ordered_malloc(strlen(str) + 1));
+ // ...and fill it with copy of passed string.
+ strcpy(mem, str);
+
+ // Overwrite temporary stored pointer to `str` with pointer to freshly
+ // saved copy.
+ _table[index] = mem;
+
+#ifdef USE_HASHSET
+ // Insert the index into lookup table.
+ _index.insert(index);
+#else
+ // Insert the index into lookup table (use previously retrieved iterator
+ // as a hint).
+ _index.insert(iIndex, index);
+#endif
+
+ // Finally.
+ return index;
+}
+
+#endif
diff --git a/contrib/eppex/LossyCounter.h b/contrib/eppex/LossyCounter.h
index 2796c8090..a3cf3339f 100644
--- a/contrib/eppex/LossyCounter.h
+++ b/contrib/eppex/LossyCounter.h
@@ -83,7 +83,7 @@ public:
const counter_t bucketWidth; // ceil(1/error)
private:
-
+
/** @var Current epoch bucket ID (b-current) */
counter_t _bucketId;
@@ -182,7 +182,7 @@ class LossyCounterIterator: public std::iterator<std::forward_iterator_tag, type
public:
typedef LossyCounterIterator<T> self_type;
-
+
typedef typename LossyCounter<T>::storage_t::const_iterator const_iterator;
protected:
@@ -288,7 +288,7 @@ protected:
template<class T>
void LossyCounter<T>::add(const T& item) {
-
+
typename storage_t::iterator iter = _storage.find(item);
if ( iter == _storage.end() ) {
@@ -330,7 +330,7 @@ void LossyCounter<T>::prune(void) {
////////////////////////////////////////////////////////////////////////////////
template<class T>
-LossyCounterIterator<T> LossyCounterIterator<T>::operator++(void) {
+LossyCounterIterator<T> LossyCounterIterator<T>::operator++(void) {
this->forward();
return *this;
}
diff --git a/contrib/eppex/eppex.cpp b/contrib/eppex/eppex.cpp
index d382890d2..76490d9d2 100644
--- a/contrib/eppex/eppex.cpp
+++ b/contrib/eppex/eppex.cpp
@@ -92,7 +92,7 @@ int main(int argc, char* argv[]) {
// Init lossy counters.
std::string lossyCountersParams;
int paramIdx = 5;
-
+
while ( (argc > paramIdx) && (*argv[paramIdx] != '-') ) {
std::string param = std::string(argv[paramIdx]);
if ( !parse_lossy_counting_params(param) ) {
@@ -113,7 +113,7 @@ int main(int argc, char* argv[]) {
usage(argv[0]);
}
}
-
+
if ( (argc > paramIdx) && (strcmp(argv[paramIdx], "--compact") == 0) ) {
compactOutputFlag = true;
++paramIdx;
@@ -154,7 +154,7 @@ int main(int argc, char* argv[]) {
readInput(eFile, fFile, aFile);
std::cerr << std::endl; // Leave the progress bar end on previous line.
-
+
// close input files
eFile.close();
fFile.close();
diff --git a/contrib/eppex/phrase-extract.cpp b/contrib/eppex/phrase-extract.cpp
index 5dff43b78..46337a8b7 100644
--- a/contrib/eppex/phrase-extract.cpp
+++ b/contrib/eppex/phrase-extract.cpp
@@ -32,14 +32,14 @@ typedef std::vector<output_pair_t> output_vector_t;
class PhraseComp {
/** @var If true, sort by target phrase first. */
bool _inverted;
-
+
bool compareAlignments(const indexed_phrases_pair_t& a, const indexed_phrases_pair_t& b);
int comparePhrases(const indexed_phrases_pair_t::phrase_t& a, const indexed_phrases_pair_t::phrase_t& b);
-
+
public:
PhraseComp(bool inverted): _inverted(inverted) {}
-
+
bool operator()(const output_pair_t& a, const output_pair_t& b);
};
@@ -448,9 +448,9 @@ void extract(SentenceAlignment &sentence) {
((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : "") + " | " +
((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : "");
}
-
+
addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
-
+
} // end of for loop through inbound phrases
} // end if buildExtraStructure
@@ -567,7 +567,7 @@ bool PhraseComp::operator()(const output_pair_t& a, const output_pair_t& b) {
else {
return cmp < 0;
}
-
+
}
@@ -607,7 +607,7 @@ bool PhraseComp::compareAlignments(const indexed_phrases_pair_t& a, const indexe
return cmp < 0;
}
}
-
+
// Note: LC_ALL=C GNU sort treats shorter item as lesser than longer one.
return (cmp == 0) ? (aSize < bSize) : (cmp < 0);
@@ -685,7 +685,7 @@ void processSortedOutput(OutputProcessor& processor) {
void processUnsortedOutput(OutputProcessor& processor) {
-
+
LossyCountersVector::value_type current = NULL, prev = NULL;
for ( size_t i = 1; i < lossyCounters.size(); ++i ) { // Intentionally skip 0.
@@ -759,7 +759,7 @@ void printStats(void) {
if ( (current == NULL) || ((current != prev) && (prev != NULL)) ) {
// Time to print.
to = i-1;
-
+
// Increment overall stats.
outputMass += prev->outputMass;
outputSize += prev->outputSize;
@@ -787,7 +787,7 @@ void printStats(void) {
from = i;
}
-
+
prev = current;
}
diff --git a/contrib/other-builds/cmake/boost.example/main.cpp b/contrib/other-builds/cmake/boost.example/main.cpp
index 7b95fb2a9..b77388e46 100644
--- a/contrib/other-builds/cmake/boost.example/main.cpp
+++ b/contrib/other-builds/cmake/boost.example/main.cpp
@@ -10,15 +10,15 @@ int main(int argc, char* argv[])
using namespace boost::locale;
using namespace std;
-
+
generator gen;
locale loc=gen("");
-
+
cout.imbue(loc);
-
+
cout << "Hello, World" << endl;
-
+
cout << "This is how we show currency in this locale " << as::currency << 103.34 << endl;
-
+
return 0;
}
diff --git a/contrib/python/moses/dictree.cpp b/contrib/python/moses/dictree.cpp
index 207d7c3f7..d9008f6e3 100644
--- a/contrib/python/moses/dictree.cpp
+++ b/contrib/python/moses/dictree.cpp
@@ -557,7 +557,7 @@ static const char *__pyx_f[] = {
* ctypedef vector[const_str_pointer] Tokens
* ctypedef float FValue # <<<<<<<<<<<<<<
* ctypedef vector[FValue] Scores
- *
+ *
*/
typedef float __pyx_t_5moses_8cdictree_FValue;
@@ -582,7 +582,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_8_genexpr;
/* "cdictree.pxd":4
* from libcpp.vector cimport vector
- *
+ *
* ctypedef string* str_pointer # <<<<<<<<<<<<<<
* ctypedef string* const_str_pointer "const str_pointer"
* ctypedef vector[const_str_pointer] Tokens
@@ -590,7 +590,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_8_genexpr;
typedef std::string *__pyx_t_5moses_8cdictree_str_pointer;
/* "cdictree.pxd":5
- *
+ *
* ctypedef string* str_pointer
* ctypedef string* const_str_pointer "const str_pointer" # <<<<<<<<<<<<<<
* ctypedef vector[const_str_pointer] Tokens
@@ -611,7 +611,7 @@ typedef std::vector<const str_pointer> __pyx_t_5moses_8cdictree_Tokens;
* ctypedef vector[const_str_pointer] Tokens
* ctypedef float FValue
* ctypedef vector[FValue] Scores # <<<<<<<<<<<<<<
- *
+ *
* cdef extern from 'PhraseDictionaryTree.h' namespace 'Moses':
*/
typedef std::vector<__pyx_t_5moses_8cdictree_FValue> __pyx_t_5moses_8cdictree_Scores;
@@ -619,7 +619,7 @@ struct __pyx_opt_args_5moses_7dictree_20PhraseDictionaryTree_getTargetProduction
/* "moses/dictree.pyx":252
* and os.path.isfile(stem + ".binphr.tgtvoc")
- *
+ *
* cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): # <<<<<<<<<<<<<<
* """Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string) to a TargetProduction (python object)."""
* cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())]
@@ -632,7 +632,7 @@ struct __pyx_opt_args_5moses_7dictree_20PhraseDictionaryTree_getTargetProduction
/* "moses/dictree.pyx":23
* raise TypeError('Cannot convert %s to string' % type(data))
- *
+ *
* cdef class Production(object): # <<<<<<<<<<<<<<
* """
* General class that represents a context-free production or a flat contiguous phrase.
@@ -646,7 +646,7 @@ struct __pyx_obj_5moses_7dictree_Production {
/* "moses/dictree.pyx":104
* return x >= y
- *
+ *
* cdef class Alignment(list): # <<<<<<<<<<<<<<
* """
* This represents a list of alignment points (pairs of integers).
@@ -658,7 +658,7 @@ struct __pyx_obj_5moses_7dictree_Alignment {
/* "moses/dictree.pyx":125
* return ' '.join('%d-%d' % (s, t) for s, t in self)
- *
+ *
* cdef class FValues(list): # <<<<<<<<<<<<<<
* """
* This represents a list of feature values (floats).
@@ -670,7 +670,7 @@ struct __pyx_obj_5moses_7dictree_FValues {
/* "moses/dictree.pyx":137
* return ' '.join(str(x) for x in self)
- *
+ *
* cdef class TargetProduction(Production): # <<<<<<<<<<<<<<
* """This class specializes production making it the target side of a translation rule.
* On top of lhs and rhs it comes with alignment information a tuple of real-valued features.
@@ -684,9 +684,9 @@ struct __pyx_obj_5moses_7dictree_TargetProduction {
/* "moses/dictree.pyx":175
* return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment)))
- *
+ *
* cdef class QueryResult(list): # <<<<<<<<<<<<<<
- *
+ *
* cdef readonly Production source
*/
struct __pyx_obj_5moses_7dictree_QueryResult {
@@ -696,10 +696,10 @@ struct __pyx_obj_5moses_7dictree_QueryResult {
/* "moses/dictree.pyx":184
- *
- *
+ *
+ *
* cdef class DictionaryTree(object): # <<<<<<<<<<<<<<
- *
+ *
* @classmethod
*/
struct __pyx_obj_5moses_7dictree_DictionaryTree {
@@ -709,7 +709,7 @@ struct __pyx_obj_5moses_7dictree_DictionaryTree {
/* "moses/dictree.pyx":202
* raise NotImplementedError
- *
+ *
* cdef class PhraseDictionaryTree(DictionaryTree): # <<<<<<<<<<<<<<
* """This class encapsulates a Moses::PhraseDictionaryTree for operations over
* binary phrase tables."""
@@ -728,9 +728,9 @@ struct __pyx_obj_5moses_7dictree_PhraseDictionaryTree {
/* "moses/dictree.pyx":290
* return results
- *
+ *
* cdef class OnDiskWrapper(DictionaryTree): # <<<<<<<<<<<<<<
- *
+ *
* cdef condiskpt.OnDiskWrapper *wrapper
*/
struct __pyx_obj_5moses_7dictree_OnDiskWrapper {
@@ -745,7 +745,7 @@ struct __pyx_obj_5moses_7dictree_OnDiskWrapper {
/* "moses/dictree.pyx":50
* return IndexError, 'Index %s out of range' % str(key)
- *
+ *
* def __iter__(self): # <<<<<<<<<<<<<<
* for x in self.rhs:
* yield x
@@ -761,10 +761,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct____iter__ {
/* "moses/dictree.pyx":122
* ValueError, 'Cannot figure out pairs from: %s' % type(alignment)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join('%d-%d' % (s, t) for s, t in self)
- *
+ *
*/
struct __pyx_obj_5moses_7dictree___pyx_scope_struct_1___str__ {
PyObject_HEAD
@@ -773,10 +773,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_1___str__ {
/* "moses/dictree.pyx":123
- *
+ *
* def __str__(self):
* return ' '.join('%d-%d' % (s, t) for s, t in self) # <<<<<<<<<<<<<<
- *
+ *
* cdef class FValues(list):
*/
struct __pyx_obj_5moses_7dictree___pyx_scope_struct_2_genexpr {
@@ -792,10 +792,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_2_genexpr {
/* "moses/dictree.pyx":134
* super(FValues, self).__init__(values)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join(str(x) for x in self)
- *
+ *
*/
struct __pyx_obj_5moses_7dictree___pyx_scope_struct_3___str__ {
PyObject_HEAD
@@ -804,10 +804,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_3___str__ {
/* "moses/dictree.pyx":135
- *
+ *
* def __str__(self):
* return ' '.join(str(x) for x in self) # <<<<<<<<<<<<<<
- *
+ *
* cdef class TargetProduction(Production):
*/
struct __pyx_obj_5moses_7dictree___pyx_scope_struct_4_genexpr {
@@ -822,7 +822,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_4_genexpr {
/* "moses/dictree.pyx":258
* return TargetProduction(words, scores, wa)
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -838,7 +838,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_5_query {
* cdef vector[string]* wa = NULL
* cdef Production source = Production(f.c_str() for f in fphrase) # <<<<<<<<<<<<<<
* cdef QueryResult results = QueryResult(source)
- *
+ *
*/
struct __pyx_obj_5moses_7dictree___pyx_scope_struct_6_genexpr {
PyObject_HEAD
@@ -850,7 +850,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_6_genexpr {
/* "moses/dictree.pyx":316
* return Production(tokens[:-1], tokens[-1])
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -881,7 +881,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_8_genexpr {
/* "moses/dictree.pyx":202
* raise NotImplementedError
- *
+ *
* cdef class PhraseDictionaryTree(DictionaryTree): # <<<<<<<<<<<<<<
* """This class encapsulates a Moses::PhraseDictionaryTree for operations over
* binary phrase tables."""
@@ -895,9 +895,9 @@ static struct __pyx_vtabstruct_5moses_7dictree_PhraseDictionaryTree *__pyx_vtabp
/* "moses/dictree.pyx":290
* return results
- *
+ *
* cdef class OnDiskWrapper(DictionaryTree): # <<<<<<<<<<<<<<
- *
+ *
* cdef condiskpt.OnDiskWrapper *wrapper
*/
@@ -1608,7 +1608,7 @@ static PyObject *__pyx_codeobj__13;
static PyObject *__pyx_codeobj__15;
/* "moses/dictree.pyx":156
- *
+ *
* @staticmethod
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* """Returns the sign of key(y) - key(x).
@@ -1662,7 +1662,7 @@ static PyObject *__pyx_lambda_funcdef_5moses_7dictree_lambda1(CYTHON_UNUSED PyOb
/* "moses/dictree.pyx":258
* return TargetProduction(words, scores, wa)
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -1832,7 +1832,7 @@ static PyObject *__pyx_lambda_funcdef_5moses_7dictree_lambda3(CYTHON_UNUSED PyOb
/* "moses/dictree.pyx":12
* from math import log
- *
+ *
* cpdef int fsign(float x): # <<<<<<<<<<<<<<
* """Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing"""
* return 1 if x >= 0 else -1
@@ -1849,7 +1849,7 @@ static int __pyx_f_5moses_7dictree_fsign(float __pyx_v_x, CYTHON_UNUSED int __py
* cpdef int fsign(float x):
* """Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing"""
* return 1 if x >= 0 else -1 # <<<<<<<<<<<<<<
- *
+ *
* cdef bytes as_str(data):
*/
if (((__pyx_v_x >= 0.0) != 0)) {
@@ -1862,7 +1862,7 @@ static int __pyx_f_5moses_7dictree_fsign(float __pyx_v_x, CYTHON_UNUSED int __py
/* "moses/dictree.pyx":12
* from math import log
- *
+ *
* cpdef int fsign(float x): # <<<<<<<<<<<<<<
* """Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing"""
* return 1 if x >= 0 else -1
@@ -1929,7 +1929,7 @@ static PyObject *__pyx_pf_5moses_7dictree_fsign(CYTHON_UNUSED PyObject *__pyx_se
/* "moses/dictree.pyx":16
* return 1 if x >= 0 else -1
- *
+ *
* cdef bytes as_str(data): # <<<<<<<<<<<<<<
* if isinstance(data, bytes):
* return data
@@ -1948,13 +1948,13 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) {
__Pyx_RefNannySetupContext("as_str", 0);
/* "moses/dictree.pyx":17
- *
+ *
* cdef bytes as_str(data):
* if isinstance(data, bytes): # <<<<<<<<<<<<<<
* return data
* elif isinstance(data, unicode):
*/
- __pyx_t_1 = PyBytes_Check(__pyx_v_data);
+ __pyx_t_1 = PyBytes_Check(__pyx_v_data);
__pyx_t_2 = (__pyx_t_1 != 0);
if (__pyx_t_2) {
@@ -1979,7 +1979,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) {
* return data.encode('UTF-8')
* raise TypeError('Cannot convert %s to string' % type(data))
*/
- __pyx_t_2 = PyUnicode_Check(__pyx_v_data);
+ __pyx_t_2 = PyUnicode_Check(__pyx_v_data);
__pyx_t_1 = (__pyx_t_2 != 0);
if (__pyx_t_1) {
@@ -1988,7 +1988,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) {
* elif isinstance(data, unicode):
* return data.encode('UTF-8') # <<<<<<<<<<<<<<
* raise TypeError('Cannot convert %s to string' % type(data))
- *
+ *
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_data, __pyx_n_s_encode); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -2006,7 +2006,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) {
* elif isinstance(data, unicode):
* return data.encode('UTF-8')
* raise TypeError('Cannot convert %s to string' % type(data)) # <<<<<<<<<<<<<<
- *
+ *
* cdef class Production(object):
*/
__pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Cannot_convert_s_to_string, ((PyObject *)Py_TYPE(__pyx_v_data))); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -2025,7 +2025,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) {
/* "moses/dictree.pyx":16
* return 1 if x >= 0 else -1
- *
+ *
* cdef bytes as_str(data): # <<<<<<<<<<<<<<
* if isinstance(data, bytes):
* return data
@@ -2045,7 +2045,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) {
/* "moses/dictree.pyx":33
* cdef readonly tuple rhs
- *
+ *
* def __init__(self, rhs, lhs = None): # <<<<<<<<<<<<<<
* """
* :rhs right-hand side of the production (or the flat contiguous phrase) - sequence of strings
@@ -2134,7 +2134,7 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose
* """
* self.rhs = tuple(rhs) # <<<<<<<<<<<<<<
* self.lhs = lhs
- *
+ *
*/
__pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
@@ -2154,7 +2154,7 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose
* """
* self.rhs = tuple(rhs)
* self.lhs = lhs # <<<<<<<<<<<<<<
- *
+ *
* def __len__(self):
*/
if (!(likely(PyBytes_CheckExact(__pyx_v_lhs))||((__pyx_v_lhs) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_lhs)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -2168,7 +2168,7 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose
/* "moses/dictree.pyx":33
* cdef readonly tuple rhs
- *
+ *
* def __init__(self, rhs, lhs = None): # <<<<<<<<<<<<<<
* """
* :rhs right-hand side of the production (or the flat contiguous phrase) - sequence of strings
@@ -2189,10 +2189,10 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose
/* "moses/dictree.pyx":41
* self.lhs = lhs
- *
+ *
* def __len__(self): # <<<<<<<<<<<<<<
* return len(self.rhs)
- *
+ *
*/
/* Python wrapper */
@@ -2219,10 +2219,10 @@ static Py_ssize_t __pyx_pf_5moses_7dictree_10Production_2__len__(struct __pyx_ob
__Pyx_RefNannySetupContext("__len__", 0);
/* "moses/dictree.pyx":42
- *
+ *
* def __len__(self):
* return len(self.rhs) # <<<<<<<<<<<<<<
- *
+ *
* def __getitem__(self, key):
*/
__pyx_t_1 = __pyx_v_self->rhs;
@@ -2238,10 +2238,10 @@ static Py_ssize_t __pyx_pf_5moses_7dictree_10Production_2__len__(struct __pyx_ob
/* "moses/dictree.pyx":41
* self.lhs = lhs
- *
+ *
* def __len__(self): # <<<<<<<<<<<<<<
* return len(self.rhs)
- *
+ *
*/
/* function exit code */
@@ -2256,7 +2256,7 @@ static Py_ssize_t __pyx_pf_5moses_7dictree_10Production_2__len__(struct __pyx_ob
/* "moses/dictree.pyx":44
* return len(self.rhs)
- *
+ *
* def __getitem__(self, key): # <<<<<<<<<<<<<<
* if 0 <= key < len(self.rhs):
* return self.rhs[key]
@@ -2288,7 +2288,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_4__getitem__(struct __pyx
__Pyx_RefNannySetupContext("__getitem__", 0);
/* "moses/dictree.pyx":45
- *
+ *
* def __getitem__(self, key):
* if 0 <= key < len(self.rhs): # <<<<<<<<<<<<<<
* return self.rhs[key]
@@ -2338,7 +2338,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_4__getitem__(struct __pyx
* return self.rhs[key]
* else:
* return IndexError, 'Index %s out of range' % str(key) # <<<<<<<<<<<<<<
- *
+ *
* def __iter__(self):
*/
__Pyx_XDECREF(__pyx_r);
@@ -2368,7 +2368,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_4__getitem__(struct __pyx
/* "moses/dictree.pyx":44
* return len(self.rhs)
- *
+ *
* def __getitem__(self, key): # <<<<<<<<<<<<<<
* if 0 <= key < len(self.rhs):
* return self.rhs[key]
@@ -2389,7 +2389,7 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato
/* "moses/dictree.pyx":50
* return IndexError, 'Index %s out of range' % str(key)
- *
+ *
* def __iter__(self): # <<<<<<<<<<<<<<
* for x in self.rhs:
* yield x
@@ -2468,11 +2468,11 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato
if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
/* "moses/dictree.pyx":51
- *
+ *
* def __iter__(self):
* for x in self.rhs: # <<<<<<<<<<<<<<
* yield x
- *
+ *
*/
if (unlikely(__pyx_cur_scope->__pyx_v_self->rhs == Py_None)) {
PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
@@ -2495,7 +2495,7 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato
* def __iter__(self):
* for x in self.rhs:
* yield x # <<<<<<<<<<<<<<
- *
+ *
* def __contains__(self, item):
*/
__Pyx_INCREF(__pyx_cur_scope->__pyx_v_x);
@@ -2519,7 +2519,7 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato
/* "moses/dictree.pyx":50
* return IndexError, 'Index %s out of range' % str(key)
- *
+ *
* def __iter__(self): # <<<<<<<<<<<<<<
* for x in self.rhs:
* yield x
@@ -2542,10 +2542,10 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato
/* "moses/dictree.pyx":54
* yield x
- *
+ *
* def __contains__(self, item): # <<<<<<<<<<<<<<
* return item in self.rhs
- *
+ *
*/
/* Python wrapper */
@@ -2571,10 +2571,10 @@ static int __pyx_pf_5moses_7dictree_10Production_9__contains__(struct __pyx_obj_
__Pyx_RefNannySetupContext("__contains__", 0);
/* "moses/dictree.pyx":55
- *
+ *
* def __contains__(self, item):
* return item in self.rhs # <<<<<<<<<<<<<<
- *
+ *
* def __reversed__(self):
*/
__pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_item, __pyx_v_self->rhs, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -2583,10 +2583,10 @@ static int __pyx_pf_5moses_7dictree_10Production_9__contains__(struct __pyx_obj_
/* "moses/dictree.pyx":54
* yield x
- *
+ *
* def __contains__(self, item): # <<<<<<<<<<<<<<
* return item in self.rhs
- *
+ *
*/
/* function exit code */
@@ -2600,10 +2600,10 @@ static int __pyx_pf_5moses_7dictree_10Production_9__contains__(struct __pyx_obj_
/* "moses/dictree.pyx":57
* return item in self.rhs
- *
+ *
* def __reversed__(self): # <<<<<<<<<<<<<<
* return reversed(self.rhs)
- *
+ *
*/
/* Python wrapper */
@@ -2630,10 +2630,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_11__reversed__(struct __p
__Pyx_RefNannySetupContext("__reversed__", 0);
/* "moses/dictree.pyx":58
- *
+ *
* def __reversed__(self):
* return reversed(self.rhs) # <<<<<<<<<<<<<<
- *
+ *
* def __hash__(self):
*/
__Pyx_XDECREF(__pyx_r);
@@ -2651,10 +2651,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_11__reversed__(struct __p
/* "moses/dictree.pyx":57
* return item in self.rhs
- *
+ *
* def __reversed__(self): # <<<<<<<<<<<<<<
* return reversed(self.rhs)
- *
+ *
*/
/* function exit code */
@@ -2671,10 +2671,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_11__reversed__(struct __p
/* "moses/dictree.pyx":60
* return reversed(self.rhs)
- *
+ *
* def __hash__(self): # <<<<<<<<<<<<<<
* return hash(self.rhs)
- *
+ *
*/
/* Python wrapper */
@@ -2701,10 +2701,10 @@ static Py_hash_t __pyx_pf_5moses_7dictree_10Production_13__hash__(struct __pyx_o
__Pyx_RefNannySetupContext("__hash__", 0);
/* "moses/dictree.pyx":61
- *
+ *
* def __hash__(self):
* return hash(self.rhs) # <<<<<<<<<<<<<<
- *
+ *
* def __str__(self):
*/
__pyx_t_1 = __pyx_v_self->rhs;
@@ -2716,10 +2716,10 @@ static Py_hash_t __pyx_pf_5moses_7dictree_10Production_13__hash__(struct __pyx_o
/* "moses/dictree.pyx":60
* return reversed(self.rhs)
- *
+ *
* def __hash__(self): # <<<<<<<<<<<<<<
* return hash(self.rhs)
- *
+ *
*/
/* function exit code */
@@ -2735,7 +2735,7 @@ static Py_hash_t __pyx_pf_5moses_7dictree_10Production_13__hash__(struct __pyx_o
/* "moses/dictree.pyx":63
* return hash(self.rhs)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* if self.lhs:
* return '%s -> %s' % (self.lhs, ' '.join(self.rhs))
@@ -2766,7 +2766,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob
__Pyx_RefNannySetupContext("__str__", 0);
/* "moses/dictree.pyx":64
- *
+ *
* def __str__(self):
* if self.lhs: # <<<<<<<<<<<<<<
* return '%s -> %s' % (self.lhs, ' '.join(self.rhs))
@@ -2809,7 +2809,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob
* return '%s -> %s' % (self.lhs, ' '.join(self.rhs))
* else:
* return ' '.join(self.rhs) # <<<<<<<<<<<<<<
- *
+ *
* def __repr__(self):
*/
__Pyx_XDECREF(__pyx_r);
@@ -2825,7 +2825,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob
/* "moses/dictree.pyx":63
* return hash(self.rhs)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* if self.lhs:
* return '%s -> %s' % (self.lhs, ' '.join(self.rhs))
@@ -2845,10 +2845,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob
/* "moses/dictree.pyx":69
* return ' '.join(self.rhs)
- *
+ *
* def __repr__(self): # <<<<<<<<<<<<<<
* return repr(self.as_tuple())
- *
+ *
*/
/* Python wrapper */
@@ -2875,10 +2875,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_17__repr__(struct __pyx_o
__Pyx_RefNannySetupContext("__repr__", 0);
/* "moses/dictree.pyx":70
- *
+ *
* def __repr__(self):
* return repr(self.as_tuple()) # <<<<<<<<<<<<<<
- *
+ *
* def as_tuple(self, lhs_first = False):
*/
__Pyx_XDECREF(__pyx_r);
@@ -2896,10 +2896,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_17__repr__(struct __pyx_o
/* "moses/dictree.pyx":69
* return ' '.join(self.rhs)
- *
+ *
* def __repr__(self): # <<<<<<<<<<<<<<
* return repr(self.as_tuple())
- *
+ *
*/
/* function exit code */
@@ -2916,7 +2916,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_17__repr__(struct __pyx_o
/* "moses/dictree.pyx":72
* return repr(self.as_tuple())
- *
+ *
* def as_tuple(self, lhs_first = False): # <<<<<<<<<<<<<<
* """
* Returns a tuple (lhs) + rhs or rhs + (lhs) depending on the flag 'lhs_first'.
@@ -3066,7 +3066,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_19as_tuple(struct __pyx_o
* return self.rhs + tuple([self.lhs])
* else:
* return self.rhs # <<<<<<<<<<<<<<
- *
+ *
* def __richcmp__(self, other, op):
*/
__Pyx_XDECREF(__pyx_r);
@@ -3077,7 +3077,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_19as_tuple(struct __pyx_o
/* "moses/dictree.pyx":72
* return repr(self.as_tuple())
- *
+ *
* def as_tuple(self, lhs_first = False): # <<<<<<<<<<<<<<
* """
* Returns a tuple (lhs) + rhs or rhs + (lhs) depending on the flag 'lhs_first'.
@@ -3097,7 +3097,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_19as_tuple(struct __pyx_o
/* "moses/dictree.pyx":84
* return self.rhs
- *
+ *
* def __richcmp__(self, other, op): # <<<<<<<<<<<<<<
* """
* The comparison uses 'as_tuple()', therefore in the CFG case, the lhs will be part of the production and it will be placed in the end
@@ -3307,7 +3307,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_
* return x > y
* elif op == 5: # <<<<<<<<<<<<<<
* return x >= y
- *
+ *
*/
__pyx_t_1 = PyObject_RichCompare(__pyx_v_op, __pyx_int_5, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -3318,7 +3318,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_
* return x > y
* elif op == 5:
* return x >= y # <<<<<<<<<<<<<<
- *
+ *
* cdef class Alignment(list):
*/
__Pyx_XDECREF(__pyx_r);
@@ -3330,7 +3330,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_
/* "moses/dictree.pyx":84
* return self.rhs
- *
+ *
* def __richcmp__(self, other, op): # <<<<<<<<<<<<<<
* """
* The comparison uses 'as_tuple()', therefore in the CFG case, the lhs will be part of the production and it will be placed in the end
@@ -3354,10 +3354,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_
/* "moses/dictree.pyx":30
* """
- *
+ *
* cdef readonly bytes lhs # <<<<<<<<<<<<<<
* cdef readonly tuple rhs
- *
+ *
*/
/* Python wrapper */
@@ -3390,10 +3390,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_3lhs___get__(struct __pyx
}
/* "moses/dictree.pyx":31
- *
+ *
* cdef readonly bytes lhs
* cdef readonly tuple rhs # <<<<<<<<<<<<<<
- *
+ *
* def __init__(self, rhs, lhs = None):
*/
@@ -3428,7 +3428,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_3rhs___get__(struct __pyx
/* "moses/dictree.pyx":110
* """
- *
+ *
* def __init__(self, alignment): # <<<<<<<<<<<<<<
* if type(alignment) is str:
* pairs = []
@@ -3511,7 +3511,7 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_
__Pyx_RefNannySetupContext("__init__", 0);
/* "moses/dictree.pyx":111
- *
+ *
* def __init__(self, alignment):
* if type(alignment) is str: # <<<<<<<<<<<<<<
* pairs = []
@@ -3610,11 +3610,11 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_
}
#if CYTHON_COMPILING_IN_CPYTHON
if (likely(PyTuple_CheckExact(sequence))) {
- __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0);
- __pyx_t_8 = PyTuple_GET_ITEM(sequence, 1);
+ __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0);
+ __pyx_t_8 = PyTuple_GET_ITEM(sequence, 1);
} else {
- __pyx_t_4 = PyList_GET_ITEM(sequence, 0);
- __pyx_t_8 = PyList_GET_ITEM(sequence, 1);
+ __pyx_t_4 = PyList_GET_ITEM(sequence, 0);
+ __pyx_t_8 = PyList_GET_ITEM(sequence, 1);
}
__Pyx_INCREF(__pyx_t_4);
__Pyx_INCREF(__pyx_t_8);
@@ -3772,7 +3772,7 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_
* super(Alignment, self).__init__(alignment)
* else:
* ValueError, 'Cannot figure out pairs from: %s' % type(alignment) # <<<<<<<<<<<<<<
- *
+ *
* def __str__(self):
*/
__pyx_t_3 = __Pyx_PyString_Format(__pyx_kp_s_Cannot_figure_out_pairs_from_s, ((PyObject *)Py_TYPE(__pyx_v_alignment))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -3791,7 +3791,7 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_
/* "moses/dictree.pyx":110
* """
- *
+ *
* def __init__(self, alignment): # <<<<<<<<<<<<<<
* if type(alignment) is str:
* pairs = []
@@ -3819,10 +3819,10 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_
/* "moses/dictree.pyx":122
* ValueError, 'Cannot figure out pairs from: %s' % type(alignment)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join('%d-%d' % (s, t) for s, t in self)
- *
+ *
*/
/* Python wrapper */
@@ -3840,10 +3840,10 @@ static PyObject *__pyx_pw_5moses_7dictree_9Alignment_3__str__(PyObject *__pyx_v_
static PyObject *__pyx_gb_5moses_7dictree_9Alignment_7__str___2generator1(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */
/* "moses/dictree.pyx":123
- *
+ *
* def __str__(self):
* return ' '.join('%d-%d' % (s, t) for s, t in self) # <<<<<<<<<<<<<<
- *
+ *
* cdef class FValues(list):
*/
@@ -3960,11 +3960,11 @@ static PyObject *__pyx_gb_5moses_7dictree_9Alignment_7__str___2generator1(__pyx_
}
#if CYTHON_COMPILING_IN_CPYTHON
if (likely(PyTuple_CheckExact(sequence))) {
- __pyx_t_5 = PyTuple_GET_ITEM(sequence, 0);
- __pyx_t_6 = PyTuple_GET_ITEM(sequence, 1);
+ __pyx_t_5 = PyTuple_GET_ITEM(sequence, 0);
+ __pyx_t_6 = PyTuple_GET_ITEM(sequence, 1);
} else {
- __pyx_t_5 = PyList_GET_ITEM(sequence, 0);
- __pyx_t_6 = PyList_GET_ITEM(sequence, 1);
+ __pyx_t_5 = PyList_GET_ITEM(sequence, 0);
+ __pyx_t_6 = PyList_GET_ITEM(sequence, 1);
}
__Pyx_INCREF(__pyx_t_5);
__Pyx_INCREF(__pyx_t_6);
@@ -4056,10 +4056,10 @@ static PyObject *__pyx_gb_5moses_7dictree_9Alignment_7__str___2generator1(__pyx_
/* "moses/dictree.pyx":122
* ValueError, 'Cannot figure out pairs from: %s' % type(alignment)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join('%d-%d' % (s, t) for s, t in self)
- *
+ *
*/
static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5moses_7dictree_Alignment *__pyx_v_self) {
@@ -4083,10 +4083,10 @@ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5
__Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
/* "moses/dictree.pyx":123
- *
+ *
* def __str__(self):
* return ' '.join('%d-%d' % (s, t) for s, t in self) # <<<<<<<<<<<<<<
- *
+ *
* cdef class FValues(list):
*/
__Pyx_XDECREF(__pyx_r);
@@ -4101,10 +4101,10 @@ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5
/* "moses/dictree.pyx":122
* ValueError, 'Cannot figure out pairs from: %s' % type(alignment)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join('%d-%d' % (s, t) for s, t in self)
- *
+ *
*/
/* function exit code */
@@ -4122,10 +4122,10 @@ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5
/* "moses/dictree.pyx":131
* """
- *
+ *
* def __init__(self, values): # <<<<<<<<<<<<<<
* super(FValues, self).__init__(values)
- *
+ *
*/
/* Python wrapper */
@@ -4192,10 +4192,10 @@ static int __pyx_pf_5moses_7dictree_7FValues___init__(struct __pyx_obj_5moses_7d
__Pyx_RefNannySetupContext("__init__", 0);
/* "moses/dictree.pyx":132
- *
+ *
* def __init__(self, values):
* super(FValues, self).__init__(values) # <<<<<<<<<<<<<<
- *
+ *
* def __str__(self):
*/
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -4225,10 +4225,10 @@ static int __pyx_pf_5moses_7dictree_7FValues___init__(struct __pyx_obj_5moses_7d
/* "moses/dictree.pyx":131
* """
- *
+ *
* def __init__(self, values): # <<<<<<<<<<<<<<
* super(FValues, self).__init__(values)
- *
+ *
*/
/* function exit code */
@@ -4247,10 +4247,10 @@ static int __pyx_pf_5moses_7dictree_7FValues___init__(struct __pyx_obj_5moses_7d
/* "moses/dictree.pyx":134
* super(FValues, self).__init__(values)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join(str(x) for x in self)
- *
+ *
*/
/* Python wrapper */
@@ -4268,10 +4268,10 @@ static PyObject *__pyx_pw_5moses_7dictree_7FValues_3__str__(PyObject *__pyx_v_se
static PyObject *__pyx_gb_5moses_7dictree_7FValues_7__str___2generator2(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */
/* "moses/dictree.pyx":135
- *
+ *
* def __str__(self):
* return ' '.join(str(x) for x in self) # <<<<<<<<<<<<<<
- *
+ *
* cdef class TargetProduction(Production):
*/
@@ -4422,10 +4422,10 @@ static PyObject *__pyx_gb_5moses_7dictree_7FValues_7__str___2generator2(__pyx_Ge
/* "moses/dictree.pyx":134
* super(FValues, self).__init__(values)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join(str(x) for x in self)
- *
+ *
*/
static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5moses_7dictree_FValues *__pyx_v_self) {
@@ -4449,10 +4449,10 @@ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5mo
__Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self);
/* "moses/dictree.pyx":135
- *
+ *
* def __str__(self):
* return ' '.join(str(x) for x in self) # <<<<<<<<<<<<<<
- *
+ *
* cdef class TargetProduction(Production):
*/
__Pyx_XDECREF(__pyx_r);
@@ -4467,10 +4467,10 @@ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5mo
/* "moses/dictree.pyx":134
* super(FValues, self).__init__(values)
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* return ' '.join(str(x) for x in self)
- *
+ *
*/
/* function exit code */
@@ -4488,7 +4488,7 @@ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5mo
/* "moses/dictree.pyx":144
* cdef readonly FValues scores
- *
+ *
* def __init__(self, rhs, scores, alignment = [], lhs = None): # <<<<<<<<<<<<<<
* """
* :rhs right-hand side tokens (sequence of terminals and nonterminals)
@@ -4632,7 +4632,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj
* super(TargetProduction, self).__init__(rhs, lhs)
* self.scores = FValues(scores) # <<<<<<<<<<<<<<
* self.alignment = Alignment(alignment)
- *
+ *
*/
__pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_3);
@@ -4652,7 +4652,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj
* super(TargetProduction, self).__init__(rhs, lhs)
* self.scores = FValues(scores)
* self.alignment = Alignment(alignment) # <<<<<<<<<<<<<<
- *
+ *
* @staticmethod
*/
__pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -4671,7 +4671,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj
/* "moses/dictree.pyx":144
* cdef readonly FValues scores
- *
+ *
* def __init__(self, rhs, scores, alignment = [], lhs = None): # <<<<<<<<<<<<<<
* """
* :rhs right-hand side tokens (sequence of terminals and nonterminals)
@@ -4692,7 +4692,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj
}
/* "moses/dictree.pyx":156
- *
+ *
* @staticmethod
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* """Returns the sign of key(y) - key(x).
@@ -4790,7 +4790,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_2desc(PyObject *__p
* Can only be used if scores is not an empty vector as
* keys defaults to scores[0]"""
* return fsign(key(y) - key(x)) # <<<<<<<<<<<<<<
- *
+ *
* def __str__(self):
*/
__Pyx_XDECREF(__pyx_r);
@@ -4823,7 +4823,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_2desc(PyObject *__p
goto __pyx_L0;
/* "moses/dictree.pyx":156
- *
+ *
* @staticmethod
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* """Returns the sign of key(y) - key(x).
@@ -4845,7 +4845,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_2desc(PyObject *__p
/* "moses/dictree.pyx":162
* return fsign(key(y) - key(x))
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* """Returns a string such as: <words> ||| <scores> [||| word-alignment info]"""
* if self.lhs:
@@ -4955,7 +4955,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p
* return ' ||| '.join((' '.join(chain(self.rhs, lhs)),
* str(self.scores), # <<<<<<<<<<<<<<
* str(self.alignment)))
- *
+ *
*/
__pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_4);
@@ -4970,7 +4970,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p
* return ' ||| '.join((' '.join(chain(self.rhs, lhs)),
* str(self.scores),
* str(self.alignment))) # <<<<<<<<<<<<<<
- *
+ *
* def __repr__(self):
*/
__pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -5009,7 +5009,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p
/* "moses/dictree.pyx":162
* return fsign(key(y) - key(x))
- *
+ *
* def __str__(self): # <<<<<<<<<<<<<<
* """Returns a string such as: <words> ||| <scores> [||| word-alignment info]"""
* if self.lhs:
@@ -5032,10 +5032,10 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p
/* "moses/dictree.pyx":172
* str(self.alignment)))
- *
+ *
* def __repr__(self): # <<<<<<<<<<<<<<
* return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment)))
- *
+ *
*/
/* Python wrapper */
@@ -5065,10 +5065,10 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6__repr__(struct __
__Pyx_RefNannySetupContext("__repr__", 0);
/* "moses/dictree.pyx":173
- *
+ *
* def __repr__(self):
* return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment))) # <<<<<<<<<<<<<<
- *
+ *
* cdef class QueryResult(list):
*/
__Pyx_XDECREF(__pyx_r);
@@ -5115,10 +5115,10 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6__repr__(struct __
/* "moses/dictree.pyx":172
* str(self.alignment)))
- *
+ *
* def __repr__(self): # <<<<<<<<<<<<<<
* return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment)))
- *
+ *
*/
/* function exit code */
@@ -5141,7 +5141,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6__repr__(struct __
* """
* cdef readonly Alignment alignment # <<<<<<<<<<<<<<
* cdef readonly FValues scores
- *
+ *
*/
/* Python wrapper */
@@ -5177,7 +5177,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_9alignment___get__(
* """
* cdef readonly Alignment alignment
* cdef readonly FValues scores # <<<<<<<<<<<<<<
- *
+ *
* def __init__(self, rhs, scores, alignment = [], lhs = None):
*/
@@ -5212,7 +5212,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6scores___get__(str
/* "moses/dictree.pyx":179
* cdef readonly Production source
- *
+ *
* def __init__(self, source, targets = []): # <<<<<<<<<<<<<<
* super(QueryResult, self).__init__(targets)
* self.source = source
@@ -5294,11 +5294,11 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos
__Pyx_RefNannySetupContext("__init__", 0);
/* "moses/dictree.pyx":180
- *
+ *
* def __init__(self, source, targets = []):
* super(QueryResult, self).__init__(targets) # <<<<<<<<<<<<<<
* self.source = source
- *
+ *
*/
__pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 180; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
@@ -5329,8 +5329,8 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos
* def __init__(self, source, targets = []):
* super(QueryResult, self).__init__(targets)
* self.source = source # <<<<<<<<<<<<<<
- *
- *
+ *
+ *
*/
if (!(likely(((__pyx_v_source) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_source, __pyx_ptype_5moses_7dictree_Production))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_3 = __pyx_v_source;
@@ -5343,7 +5343,7 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos
/* "moses/dictree.pyx":179
* cdef readonly Production source
- *
+ *
* def __init__(self, source, targets = []): # <<<<<<<<<<<<<<
* super(QueryResult, self).__init__(targets)
* self.source = source
@@ -5365,9 +5365,9 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos
/* "moses/dictree.pyx":177
* cdef class QueryResult(list):
- *
+ *
* cdef readonly Production source # <<<<<<<<<<<<<<
- *
+ *
* def __init__(self, source, targets = []):
*/
@@ -5401,7 +5401,7 @@ static PyObject *__pyx_pf_5moses_7dictree_11QueryResult_6source___get__(struct _
}
/* "moses/dictree.pyx":187
- *
+ *
* @classmethod
* def canLoad(cls, path, bint wa = False): # <<<<<<<<<<<<<<
* """Whether or not the path represents a valid table for that class."""
@@ -5488,14 +5488,14 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_canLoad(CYTHON_UNUSED
* def canLoad(cls, path, bint wa = False):
* """Whether or not the path represents a valid table for that class."""
* raise NotImplementedError # <<<<<<<<<<<<<<
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None):
*/
__Pyx_Raise(__pyx_builtin_NotImplementedError, 0, 0, 0);
{__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
/* "moses/dictree.pyx":187
- *
+ *
* @classmethod
* def canLoad(cls, path, bint wa = False): # <<<<<<<<<<<<<<
* """Whether or not the path represents a valid table for that class."""
@@ -5513,7 +5513,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_canLoad(CYTHON_UNUSED
/* "moses/dictree.pyx":191
* raise NotImplementedError
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -5616,7 +5616,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_2query(CYTHON_UNUSED
* :return QueryResult
* """
* raise NotImplementedError # <<<<<<<<<<<<<<
- *
+ *
* cdef class PhraseDictionaryTree(DictionaryTree):
*/
__Pyx_Raise(__pyx_builtin_NotImplementedError, 0, 0, 0);
@@ -5624,7 +5624,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_2query(CYTHON_UNUSED
/* "moses/dictree.pyx":191
* raise NotImplementedError
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -5641,7 +5641,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_2query(CYTHON_UNUSED
/* "moses/dictree.pyx":213
* cdef readonly unsigned tableLimit
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, unsigned nscores = 5, bint wa = False, delimiters = ' \t'): # <<<<<<<<<<<<<<
* """
* :path stem of the table, e.g europarl.fr-en is the stem for europar.fr-en.binphr.*
@@ -5771,7 +5771,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py
/* "moses/dictree.pyx":221
* """
- *
+ *
* if not PhraseDictionaryTree.canLoad(path, wa): # <<<<<<<<<<<<<<
* raise ValueError, "'%s' doesn't seem a valid binary table." % path
* self.path = path
@@ -5798,7 +5798,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py
if (__pyx_t_5) {
/* "moses/dictree.pyx":222
- *
+ *
* if not PhraseDictionaryTree.canLoad(path, wa):
* raise ValueError, "'%s' doesn't seem a valid binary table." % path # <<<<<<<<<<<<<<
* self.path = path
@@ -5881,7 +5881,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py
* self.tree = new cdictree.PhraseDictionaryTree()
* self.tree.NeedAlignmentInfo(wa) # <<<<<<<<<<<<<<
* self.tree.Read(path)
- *
+ *
*/
__pyx_v_self->tree->NeedAlignmentInfo(__pyx_v_wa);
@@ -5889,7 +5889,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py
* self.tree = new cdictree.PhraseDictionaryTree()
* self.tree.NeedAlignmentInfo(wa)
* self.tree.Read(path) # <<<<<<<<<<<<<<
- *
+ *
* def __dealloc__(self):
*/
__pyx_t_6 = __pyx_convert_string_from_py_(__pyx_v_path); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -5897,7 +5897,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py
/* "moses/dictree.pyx":213
* cdef readonly unsigned tableLimit
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, unsigned nscores = 5, bint wa = False, delimiters = ' \t'): # <<<<<<<<<<<<<<
* """
* :path stem of the table, e.g europarl.fr-en is the stem for europar.fr-en.binphr.*
@@ -5919,10 +5919,10 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py
/* "moses/dictree.pyx":232
* self.tree.Read(path)
- *
+ *
* def __dealloc__(self): # <<<<<<<<<<<<<<
* del self.tree
- *
+ *
*/
/* Python wrapper */
@@ -5941,20 +5941,20 @@ static void __pyx_pf_5moses_7dictree_20PhraseDictionaryTree_2__dealloc__(struct
__Pyx_RefNannySetupContext("__dealloc__", 0);
/* "moses/dictree.pyx":233
- *
+ *
* def __dealloc__(self):
* del self.tree # <<<<<<<<<<<<<<
- *
+ *
* @classmethod
*/
delete __pyx_v_self->tree;
/* "moses/dictree.pyx":232
* self.tree.Read(path)
- *
+ *
* def __dealloc__(self): # <<<<<<<<<<<<<<
* del self.tree
- *
+ *
*/
/* function exit code */
@@ -5962,7 +5962,7 @@ static void __pyx_pf_5moses_7dictree_20PhraseDictionaryTree_2__dealloc__(struct
}
/* "moses/dictree.pyx":236
- *
+ *
* @classmethod
* def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<<
* """This sanity check was added to the constructor, but you can access it from outside this class
@@ -6368,7 +6368,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON
* and os.path.isfile(stem + ".binphr.srcvoc") \
* and os.path.isfile(stem + ".binphr.tgtdata") \ # <<<<<<<<<<<<<<
* and os.path.isfile(stem + ".binphr.tgtvoc")
- *
+ *
*/
__pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_8);
@@ -6397,7 +6397,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON
* and os.path.isfile(stem + ".binphr.srcvoc") \
* and os.path.isfile(stem + ".binphr.tgtdata") \
* and os.path.isfile(stem + ".binphr.tgtvoc") # <<<<<<<<<<<<<<
- *
+ *
* cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None):
*/
__pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -6449,7 +6449,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON
}
/* "moses/dictree.pyx":236
- *
+ *
* @classmethod
* def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<<
* """This sanity check was added to the constructor, but you can access it from outside this class
@@ -6475,7 +6475,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON
/* "moses/dictree.pyx":252
* and os.path.isfile(stem + ".binphr.tgtvoc")
- *
+ *
* cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): # <<<<<<<<<<<<<<
* """Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string) to a TargetProduction (python object)."""
* cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())]
@@ -6538,7 +6538,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre
* cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())]
* cdef list scores = [score for score in cand.scores] if converter is None else [converter(score) for score in cand.scores] # <<<<<<<<<<<<<<
* return TargetProduction(words, scores, wa)
- *
+ *
*/
__pyx_t_5 = (__pyx_v_converter == Py_None);
if ((__pyx_t_5 != 0)) {
@@ -6592,7 +6592,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre
* cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())]
* cdef list scores = [score for score in cand.scores] if converter is None else [converter(score) for score in cand.scores]
* return TargetProduction(words, scores, wa) # <<<<<<<<<<<<<<
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None):
*/
__Pyx_XDECREF(((PyObject *)__pyx_r));
@@ -6616,7 +6616,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre
/* "moses/dictree.pyx":252
* and os.path.isfile(stem + ".binphr.tgtvoc")
- *
+ *
* cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): # <<<<<<<<<<<<<<
* """Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string) to a TargetProduction (python object)."""
* cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())]
@@ -6641,7 +6641,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre
/* "moses/dictree.pyx":258
* return TargetProduction(words, scores, wa)
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -6738,7 +6738,7 @@ static PyObject *__pyx_gb_5moses_7dictree_20PhraseDictionaryTree_5query_2generat
* cdef vector[string]* wa = NULL
* cdef Production source = Production(f.c_str() for f in fphrase) # <<<<<<<<<<<<<<
* cdef QueryResult results = QueryResult(source)
- *
+ *
*/
static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_5query_genexpr(PyObject *__pyx_self) {
@@ -6836,7 +6836,7 @@ static PyObject *__pyx_gb_5moses_7dictree_20PhraseDictionaryTree_5query_2generat
/* "moses/dictree.pyx":258
* return TargetProduction(words, scores, wa)
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -6930,7 +6930,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
* cdef vector[string]* wa = NULL
* cdef Production source = Production(f.c_str() for f in fphrase) # <<<<<<<<<<<<<<
* cdef QueryResult results = QueryResult(source)
- *
+ *
*/
__pyx_t_1 = __pyx_pf_5moses_7dictree_20PhraseDictionaryTree_5query_genexpr(((PyObject*)__pyx_cur_scope)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
@@ -6949,7 +6949,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
* cdef vector[string]* wa = NULL
* cdef Production source = Production(f.c_str() for f in fphrase)
* cdef QueryResult results = QueryResult(source) # <<<<<<<<<<<<<<
- *
+ *
* if not self.wa:
*/
__pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -6965,7 +6965,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
/* "moses/dictree.pyx":274
* cdef QueryResult results = QueryResult(source)
- *
+ *
* if not self.wa: # <<<<<<<<<<<<<<
* self.tree.GetTargetCandidates(fphrase, rv[0])
* results.extend([self.getTargetProduction(candidate, None, converter) for candidate in rv[0]])
@@ -6974,7 +6974,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
if (__pyx_t_6) {
/* "moses/dictree.pyx":275
- *
+ *
* if not self.wa:
* self.tree.GetTargetCandidates(fphrase, rv[0]) # <<<<<<<<<<<<<<
* results.extend([self.getTargetProduction(candidate, None, converter) for candidate in rv[0]])
@@ -7175,7 +7175,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
* return QueryResult(source, results[0:self.tableLimit])
* else:
* return results # <<<<<<<<<<<<<<
- *
+ *
* cdef class OnDiskWrapper(DictionaryTree):
*/
__Pyx_XDECREF(__pyx_r);
@@ -7186,7 +7186,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
/* "moses/dictree.pyx":258
* return TargetProduction(words, scores, wa)
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -7211,7 +7211,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _
}
/* "moses/dictree.pyx":207
- *
+ *
* cdef cdictree.PhraseDictionaryTree* tree
* cdef readonly bytes path # <<<<<<<<<<<<<<
* cdef readonly unsigned nscores
@@ -7346,7 +7346,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_2wa___get__(str
* cdef readonly bint wa
* cdef readonly bytes delimiters # <<<<<<<<<<<<<<
* cdef readonly unsigned tableLimit
- *
+ *
*/
/* Python wrapper */
@@ -7382,7 +7382,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_10delimiters___
* cdef readonly bint wa
* cdef readonly bytes delimiters
* cdef readonly unsigned tableLimit # <<<<<<<<<<<<<<
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, unsigned nscores = 5, bint wa = False, delimiters = ' \t'):
*/
@@ -7427,7 +7427,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_10tableLimit___
/* "moses/dictree.pyx":297
* cdef readonly unsigned tableLimit
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'): # <<<<<<<<<<<<<<
* self.delimiters = delimiters
* self.tableLimit = tableLimit
@@ -7527,7 +7527,7 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5
__Pyx_RefNannySetupContext("__cinit__", 0);
/* "moses/dictree.pyx":298
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'):
* self.delimiters = delimiters # <<<<<<<<<<<<<<
* self.tableLimit = tableLimit
@@ -7565,7 +7565,7 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5
* self.wrapper = new condiskpt.OnDiskWrapper()
* self.wrapper.BeginLoad(string(path)) # <<<<<<<<<<<<<<
* self.finder = new condiskpt.OnDiskQuery(self.wrapper[0])
- *
+ *
*/
__pyx_t_2 = __Pyx_PyObject_AsString(__pyx_v_path); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
try {
@@ -7580,14 +7580,14 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5
* self.wrapper = new condiskpt.OnDiskWrapper()
* self.wrapper.BeginLoad(string(path))
* self.finder = new condiskpt.OnDiskQuery(self.wrapper[0]) # <<<<<<<<<<<<<<
- *
+ *
* @classmethod
*/
__pyx_v_self->finder = new OnDiskPt::OnDiskQuery((__pyx_v_self->wrapper[0]));
/* "moses/dictree.pyx":297
* cdef readonly unsigned tableLimit
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'): # <<<<<<<<<<<<<<
* self.delimiters = delimiters
* self.tableLimit = tableLimit
@@ -7606,7 +7606,7 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5
}
/* "moses/dictree.pyx":305
- *
+ *
* @classmethod
* def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<<
* return os.path.isfile(stem + "/Misc.dat") \
@@ -7808,7 +7808,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED
* and os.path.isfile(stem + "/TargetColl.dat") \
* and os.path.isfile(stem + "/TargetInd.dat") \ # <<<<<<<<<<<<<<
* and os.path.isfile(stem + "/Vocab.dat")
- *
+ *
*/
__pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_6);
@@ -7837,7 +7837,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED
* and os.path.isfile(stem + "/TargetColl.dat") \
* and os.path.isfile(stem + "/TargetInd.dat") \
* and os.path.isfile(stem + "/Vocab.dat") # <<<<<<<<<<<<<<
- *
+ *
* cdef Production getSourceProduction(self, vector[string] ftokens):
*/
__pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -7888,7 +7888,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED
goto __pyx_L0;
/* "moses/dictree.pyx":305
- *
+ *
* @classmethod
* def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<<
* return os.path.isfile(stem + "/Misc.dat") \
@@ -7914,7 +7914,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED
/* "moses/dictree.pyx":312
* and os.path.isfile(stem + "/Vocab.dat")
- *
+ *
* cdef Production getSourceProduction(self, vector[string] ftokens): # <<<<<<<<<<<<<<
* cdef list tokens = [f.c_str() for f in ftokens]
* return Production(tokens[:-1], tokens[-1])
@@ -7936,11 +7936,11 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On
__Pyx_RefNannySetupContext("getSourceProduction", 0);
/* "moses/dictree.pyx":313
- *
+ *
* cdef Production getSourceProduction(self, vector[string] ftokens):
* cdef list tokens = [f.c_str() for f in ftokens] # <<<<<<<<<<<<<<
* return Production(tokens[:-1], tokens[-1])
- *
+ *
*/
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
@@ -7962,7 +7962,7 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On
* cdef Production getSourceProduction(self, vector[string] ftokens):
* cdef list tokens = [f.c_str() for f in ftokens]
* return Production(tokens[:-1], tokens[-1]) # <<<<<<<<<<<<<<
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None):
*/
__Pyx_XDECREF(((PyObject *)__pyx_r));
@@ -7987,7 +7987,7 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On
/* "moses/dictree.pyx":312
* and os.path.isfile(stem + "/Vocab.dat")
- *
+ *
* cdef Production getSourceProduction(self, vector[string] ftokens): # <<<<<<<<<<<<<<
* cdef list tokens = [f.c_str() for f in ftokens]
* return Production(tokens[:-1], tokens[-1])
@@ -8009,7 +8009,7 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On
/* "moses/dictree.pyx":316
* return Production(tokens[:-1], tokens[-1])
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -8231,7 +8231,7 @@ static PyObject *__pyx_gb_5moses_7dictree_13OnDiskWrapper_5query_2generator4(__p
/* "moses/dictree.pyx":316
* return Production(tokens[:-1], tokens[-1])
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -8549,7 +8549,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob
* if cmp:
* results.sort(cmp=cmp, key=key) # <<<<<<<<<<<<<<
* return results
- *
+ *
*/
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_results), __pyx_n_s_sort); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
@@ -8570,7 +8570,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob
* if cmp:
* results.sort(cmp=cmp, key=key)
* return results # <<<<<<<<<<<<<<
- *
+ *
* def load(path, nscores, limit):
*/
__Pyx_XDECREF(__pyx_r);
@@ -8580,7 +8580,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob
/* "moses/dictree.pyx":316
* return Production(tokens[:-1], tokens[-1])
- *
+ *
* def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -8611,7 +8611,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob
* cdef condiskpt.OnDiskQuery *finder
* cdef readonly bytes delimiters # <<<<<<<<<<<<<<
* cdef readonly unsigned tableLimit
- *
+ *
*/
/* Python wrapper */
@@ -8647,7 +8647,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_10delimiters___get__(s
* cdef condiskpt.OnDiskQuery *finder
* cdef readonly bytes delimiters
* cdef readonly unsigned tableLimit # <<<<<<<<<<<<<<
- *
+ *
* def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'):
*/
@@ -8692,7 +8692,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_10tableLimit___get__(s
/* "moses/dictree.pyx":349
* return results
- *
+ *
* def load(path, nscores, limit): # <<<<<<<<<<<<<<
* """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree."""
* if PhraseDictionaryTree.canLoad(path, False):
@@ -8953,7 +8953,7 @@ static PyObject *__pyx_pf_5moses_7dictree_2load(CYTHON_UNUSED PyObject *__pyx_se
/* "moses/dictree.pyx":349
* return results
- *
+ *
* def load(path, nscores, limit): # <<<<<<<<<<<<<<
* """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree."""
* if PhraseDictionaryTree.canLoad(path, False):
@@ -8973,7 +8973,7 @@ static PyObject *__pyx_pf_5moses_7dictree_2load(CYTHON_UNUSED PyObject *__pyx_se
}
/* "string.from_py":13
- *
+ *
* @cname("__pyx_convert_string_from_py_")
* cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<<
* cdef Py_ssize_t length
@@ -8996,7 +8996,7 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) {
* cdef Py_ssize_t length
* cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) # <<<<<<<<<<<<<<
* return string(data, length)
- *
+ *
*/
__pyx_t_1 = __Pyx_PyObject_AsStringAndSize(__pyx_v_o, (&__pyx_v_length)); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_v_data = __pyx_t_1;
@@ -9005,14 +9005,14 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) {
* cdef Py_ssize_t length
* cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length)
* return string(data, length) # <<<<<<<<<<<<<<
- *
- *
+ *
+ *
*/
__pyx_r = std::string(__pyx_v_data, __pyx_v_length);
goto __pyx_L0;
/* "string.from_py":13
- *
+ *
* @cname("__pyx_convert_string_from_py_")
* cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<<
* cdef Py_ssize_t length
@@ -9028,11 +9028,11 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) {
}
/* "vector.to_py":63
- *
+ *
* @cname("__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue")
* cdef object __pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
- *
+ *
*/
static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(const std::vector<__pyx_t_5moses_8cdictree_FValue> &__pyx_v_v) {
@@ -9052,8 +9052,8 @@ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(cons
* @cname("__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue")
* cdef object __pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(vector[X]& v):
* return [X_to_py(v[i]) for i in range(v.size())] # <<<<<<<<<<<<<<
- *
- *
+ *
+ *
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -9071,11 +9071,11 @@ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(cons
goto __pyx_L0;
/* "vector.to_py":63
- *
+ *
* @cname("__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue")
* cdef object __pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
- *
+ *
*/
/* function exit code */
@@ -9091,11 +9091,11 @@ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(cons
}
/* "pair.to_py":180
- *
+ *
* @cname("__pyx_convert_pair_to_py_int____int")
* cdef object __pyx_convert_pair_to_py_int____int(pair[X,Y]& p): # <<<<<<<<<<<<<<
* return X_to_py(p.first), Y_to_py(p.second)
- *
+ *
*/
static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair<int,int> &__pyx_v_p) {
@@ -9113,8 +9113,8 @@ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair<int,int> &_
* @cname("__pyx_convert_pair_to_py_int____int")
* cdef object __pyx_convert_pair_to_py_int____int(pair[X,Y]& p):
* return X_to_py(p.first), Y_to_py(p.second) # <<<<<<<<<<<<<<
- *
- *
+ *
+ *
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_p.first); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -9134,11 +9134,11 @@ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair<int,int> &_
goto __pyx_L0;
/* "pair.to_py":180
- *
+ *
* @cname("__pyx_convert_pair_to_py_int____int")
* cdef object __pyx_convert_pair_to_py_int____int(pair[X,Y]& p): # <<<<<<<<<<<<<<
* return X_to_py(p.first), Y_to_py(p.second)
- *
+ *
*/
/* function exit code */
@@ -9155,11 +9155,11 @@ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair<int,int> &_
}
/* "vector.to_py":63
- *
+ *
* @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair")
* cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
- *
+ *
*/
static PyObject *__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(const std::vector<OnDiskPt::AlignPair> &__pyx_v_v) {
@@ -9179,8 +9179,8 @@ static PyObject *__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(const std:
* @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair")
* cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v):
* return [X_to_py(v[i]) for i in range(v.size())] # <<<<<<<<<<<<<<
- *
- *
+ *
+ *
*/
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -9198,11 +9198,11 @@ static PyObject *__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(const std:
goto __pyx_L0;
/* "vector.to_py":63
- *
+ *
* @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair")
* cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
- *
+ *
*/
/* function exit code */
@@ -11255,7 +11255,7 @@ static int __Pyx_InitCachedConstants(void) {
* elif isinstance(data, unicode):
* return data.encode('UTF-8') # <<<<<<<<<<<<<<
* raise TypeError('Cannot convert %s to string' % type(data))
- *
+ *
*/
__pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_UTF_8); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_tuple_);
@@ -11273,7 +11273,7 @@ static int __Pyx_InitCachedConstants(void) {
__Pyx_GIVEREF(__pyx_tuple__4);
/* "moses/dictree.pyx":156
- *
+ *
* @staticmethod
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* """Returns the sign of key(y) - key(x).
@@ -11286,7 +11286,7 @@ static int __Pyx_InitCachedConstants(void) {
/* "moses/dictree.pyx":349
* return results
- *
+ *
* def load(path, nscores, limit): # <<<<<<<<<<<<<<
* """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree."""
* if PhraseDictionaryTree.canLoad(path, False):
@@ -11539,7 +11539,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
* cimport cdictree
* cimport condiskpt
* from math import log # <<<<<<<<<<<<<<
- *
+ *
* cpdef int fsign(float x):
*/
__pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -11558,7 +11558,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":144
* cdef readonly FValues scores
- *
+ *
* def __init__(self, rhs, scores, alignment = [], lhs = None): # <<<<<<<<<<<<<<
* """
* :rhs right-hand side tokens (sequence of terminals and nonterminals)
@@ -11570,7 +11570,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
__pyx_t_1 = 0;
/* "moses/dictree.pyx":156
- *
+ *
* @staticmethod
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* """Returns the sign of key(y) - key(x).
@@ -11584,7 +11584,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":155
* self.alignment = Alignment(alignment)
- *
+ *
* @staticmethod # <<<<<<<<<<<<<<
* def desc(x, y, key = lambda r: r.scores[0]):
* """Returns the sign of key(y) - key(x).
@@ -11604,7 +11604,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
PyType_Modified(__pyx_ptype_5moses_7dictree_TargetProduction);
/* "moses/dictree.pyx":156
- *
+ *
* @staticmethod
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* """Returns the sign of key(y) - key(x).
@@ -11615,7 +11615,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":155
* self.alignment = Alignment(alignment)
- *
+ *
* @staticmethod # <<<<<<<<<<<<<<
* def desc(x, y, key = lambda r: r.scores[0]):
* """Returns the sign of key(y) - key(x).
@@ -11634,7 +11634,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":179
* cdef readonly Production source
- *
+ *
* def __init__(self, source, targets = []): # <<<<<<<<<<<<<<
* super(QueryResult, self).__init__(targets)
* self.source = source
@@ -11646,7 +11646,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
__pyx_t_1 = 0;
/* "moses/dictree.pyx":187
- *
+ *
* @classmethod
* def canLoad(cls, path, bint wa = False): # <<<<<<<<<<<<<<
* """Whether or not the path represents a valid table for that class."""
@@ -11657,7 +11657,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":186
* cdef class DictionaryTree(object):
- *
+ *
* @classmethod # <<<<<<<<<<<<<<
* def canLoad(cls, path, bint wa = False):
* """Whether or not the path represents a valid table for that class."""
@@ -11670,7 +11670,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
PyType_Modified(__pyx_ptype_5moses_7dictree_DictionaryTree);
/* "moses/dictree.pyx":236
- *
+ *
* @classmethod
* def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<<
* """This sanity check was added to the constructor, but you can access it from outside this class
@@ -11681,7 +11681,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":235
* del self.tree
- *
+ *
* @classmethod # <<<<<<<<<<<<<<
* def canLoad(cls, stem, bint wa = False):
* """This sanity check was added to the constructor, but you can access it from outside this class
@@ -11695,7 +11695,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":258
* return TargetProduction(words, scores, wa)
- *
+ *
* def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<<
* """
* Returns a list of target productions that translate a given source production
@@ -11712,7 +11712,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
__pyx_t_1 = 0;
/* "moses/dictree.pyx":305
- *
+ *
* @classmethod
* def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<<
* return os.path.isfile(stem + "/Misc.dat") \
@@ -11723,7 +11723,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":304
* self.finder = new condiskpt.OnDiskQuery(self.wrapper[0])
- *
+ *
* @classmethod # <<<<<<<<<<<<<<
* def canLoad(cls, stem, bint wa = False):
* return os.path.isfile(stem + "/Misc.dat") \
@@ -11737,7 +11737,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":349
* return results
- *
+ *
* def load(path, nscores, limit): # <<<<<<<<<<<<<<
* """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree."""
* if PhraseDictionaryTree.canLoad(path, False):
@@ -11749,7 +11749,7 @@ PyMODINIT_FUNC PyInit_dictree(void)
/* "moses/dictree.pyx":1
* # This module wraps phrase/rule tables # <<<<<<<<<<<<<<
- *
+ *
* from libcpp.string cimport string
*/
__pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@@ -11758,11 +11758,11 @@ PyMODINIT_FUNC PyInit_dictree(void)
__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
/* "vector.to_py":63
- *
+ *
* @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair")
* cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): # <<<<<<<<<<<<<<
* return [X_to_py(v[i]) for i in range(v.size())]
- *
+ *
*/
goto __pyx_L0;
__pyx_L1_error:;
diff --git a/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
index cfc15d592..989c727a2 100755
--- a/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
+++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
@@ -1,231 +1,231 @@
-// XGetopt.cpp Version 1.2
-//
-// Author: Hans Dietrich
-// hdietrich2@hotmail.com
-//
-// Description:
-// XGetopt.cpp implements getopt(), a function to parse command lines.
-//
-// History
-// Version 1.2 - 2003 May 17
-// - Added Unicode support
-//
-// Version 1.1 - 2002 March 10
-// - Added example to XGetopt.cpp module header
-//
-// This software is released into the public domain.
-// You are free to use it in any way you like.
-//
-// This software is provided "as is" with no expressed
-// or implied warranty. I accept no liability for any
-// damage or loss of business that this software may cause.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-
-///////////////////////////////////////////////////////////////////////////////
-// if you are using precompiled headers then include this line:
-///////////////////////////////////////////////////////////////////////////////
-
-
-///////////////////////////////////////////////////////////////////////////////
-// if you are not using precompiled headers then include these lines:
-//#include <windows.h>
-//#include <cstdio>
-//#include <tchar.h>
-///////////////////////////////////////////////////////////////////////////////
-
-
-#include <cstdio>
-#include <cstring>
-#include <cmath>
-#include "WIN32_functions.h"
-
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// X G e t o p t . c p p
-//
-//
-// NAME
-// getopt -- parse command line options
-//
-// SYNOPSIS
-// int getopt(int argc, char *argv[], char *optstring)
-//
-// extern char *optarg;
-// extern int optind;
-//
-// DESCRIPTION
-// The getopt() function parses the command line arguments. Its
-// arguments argc and argv are the argument count and array as
-// passed into the application on program invocation. In the case
-// of Visual C++ programs, argc and argv are available via the
-// variables __argc and __argv (double underscores), respectively.
-// getopt returns the next option letter in argv that matches a
-// letter in optstring. (Note: Unicode programs should use
-// __targv instead of __argv. Also, all character and string
-// literals should be enclosed in ( ) ).
-//
-// optstring is a string of recognized option letters; if a letter
-// is followed by a colon, the option is expected to have an argument
-// that may or may not be separated from it by white space. optarg
-// is set to point to the start of the option argument on return from
-// getopt.
-//
-// Option letters may be combined, e.g., "-ab" is equivalent to
-// "-a -b". Option letters are case sensitive.
-//
-// getopt places in the external variable optind the argv index
-// of the next argument to be processed. optind is initialized
-// to 0 before the first call to getopt.
-//
-// When all options have been processed (i.e., up to the first
-// non-option argument), getopt returns EOF, optarg will point
-// to the argument, and optind will be set to the argv index of
-// the argument. If there are no non-option arguments, optarg
-// will be set to NULL.
-//
-// The special option "--" may be used to delimit the end of the
-// options; EOF will be returned, and "--" (and everything after it)
-// will be skipped.
-//
-// RETURN VALUE
-// For option letters contained in the string optstring, getopt
-// will return the option letter. getopt returns a question mark (?)
-// when it encounters an option letter not included in optstring.
-// EOF is returned when processing is finished.
-//
-// BUGS
-// 1) Long options are not supported.
-// 2) The GNU double-colon extension is not supported.
-// 3) The environment variable POSIXLY_CORRECT is not supported.
-// 4) The + syntax is not supported.
-// 5) The automatic permutation of arguments is not supported.
-// 6) This implementation of getopt() returns EOF if an error is
-// encountered, instead of -1 as the latest standard requires.
-//
-// EXAMPLE
-// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
-// {
-// int c;
-//
-// while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
-// {
-// switch (c)
-// {
-// case ('a'):
-// TRACE(("option a\n"));
-// //
-// // set some flag here
-// //
-// break;
-//
-// case ('B'):
-// TRACE( ("option B\n"));
-// //
-// // set some other flag here
-// //
-// break;
-//
-// case ('n'):
-// TRACE(("option n: value=%d\n"), atoi(optarg));
-// //
-// // do something with value here
-// //
-// break;
-//
-// case ('?'):
-// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
-// return FALSE;
-// break;
-//
-// default:
-// TRACE(("WARNING: no handler for option %c\n"), c);
-// return FALSE;
-// break;
-// }
-// }
-// //
-// // check for non-option args here
-// //
-// return TRUE;
-// }
-//
-///////////////////////////////////////////////////////////////////////////////
-
-char *optarg; // global argument pointer
-int optind = 0; // global argv index
-
-int getopt(int argc, char *argv[], char *optstring)
-{
- static char *next = NULL;
- if (optind == 0)
- next = NULL;
-
- optarg = NULL;
-
- if (next == NULL || *next =='\0') {
- if (optind == 0)
- optind++;
-
- if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) {
- optarg = NULL;
- if (optind < argc)
- optarg = argv[optind];
- return EOF;
- }
-
- if (strcmp(argv[optind], "--") == 0) {
- optind++;
- optarg = NULL;
- if (optind < argc)
- optarg = argv[optind];
- return EOF;
- }
-
- next = argv[optind];
- next++; // skip past -
- optind++;
- }
-
- char c = *next++;
- char *cp = strchr(optstring, c);
-
- if (cp == NULL || c == (':'))
- return ('?');
-
- cp++;
- if (*cp == (':')) {
- if (*next != ('\0')) {
- optarg = next;
- next = NULL;
- } else if (optind < argc) {
- optarg = argv[optind];
- optind++;
- } else {
- return ('?');
- }
- }
-
- return c;
-}
-
-// for an overview, see
-// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
-double lgamma(int x)
-{
- // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
- if (x <= 2) {
- return 0.0;
- }
- static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
- double tmp=(double)x+5.5;
- tmp -= (((double)x)+0.5)*log(tmp);
- double y=(double)x;
- double sum = 1.000000000190015;
- for (size_t j=0; j<6; ++j) {
- sum += coefs[j]/++y;
- }
- return -tmp+log(2.5066282746310005*sum/(double)x);
-}
+// XGetopt.cpp Version 1.2
+//
+// Author: Hans Dietrich
+// hdietrich2@hotmail.com
+//
+// Description:
+// XGetopt.cpp implements getopt(), a function to parse command lines.
+//
+// History
+// Version 1.2 - 2003 May 17
+// - Added Unicode support
+//
+// Version 1.1 - 2002 March 10
+// - Added example to XGetopt.cpp module header
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty. I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are using precompiled headers then include this line:
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are not using precompiled headers then include these lines:
+//#include <windows.h>
+//#include <cstdio>
+//#include <tchar.h>
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include "WIN32_functions.h"
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// X G e t o p t . c p p
+//
+//
+// NAME
+// getopt -- parse command line options
+//
+// SYNOPSIS
+// int getopt(int argc, char *argv[], char *optstring)
+//
+// extern char *optarg;
+// extern int optind;
+//
+// DESCRIPTION
+// The getopt() function parses the command line arguments. Its
+// arguments argc and argv are the argument count and array as
+// passed into the application on program invocation. In the case
+// of Visual C++ programs, argc and argv are available via the
+// variables __argc and __argv (double underscores), respectively.
+// getopt returns the next option letter in argv that matches a
+// letter in optstring. (Note: Unicode programs should use
+// __targv instead of __argv. Also, all character and string
+// literals should be enclosed in ( ) ).
+//
+// optstring is a string of recognized option letters; if a letter
+// is followed by a colon, the option is expected to have an argument
+// that may or may not be separated from it by white space. optarg
+// is set to point to the start of the option argument on return from
+// getopt.
+//
+// Option letters may be combined, e.g., "-ab" is equivalent to
+// "-a -b". Option letters are case sensitive.
+//
+// getopt places in the external variable optind the argv index
+// of the next argument to be processed. optind is initialized
+// to 0 before the first call to getopt.
+//
+// When all options have been processed (i.e., up to the first
+// non-option argument), getopt returns EOF, optarg will point
+// to the argument, and optind will be set to the argv index of
+// the argument. If there are no non-option arguments, optarg
+// will be set to NULL.
+//
+// The special option "--" may be used to delimit the end of the
+// options; EOF will be returned, and "--" (and everything after it)
+// will be skipped.
+//
+// RETURN VALUE
+// For option letters contained in the string optstring, getopt
+// will return the option letter. getopt returns a question mark (?)
+// when it encounters an option letter not included in optstring.
+// EOF is returned when processing is finished.
+//
+// BUGS
+// 1) Long options are not supported.
+// 2) The GNU double-colon extension is not supported.
+// 3) The environment variable POSIXLY_CORRECT is not supported.
+// 4) The + syntax is not supported.
+// 5) The automatic permutation of arguments is not supported.
+// 6) This implementation of getopt() returns EOF if an error is
+// encountered, instead of -1 as the latest standard requires.
+//
+// EXAMPLE
+// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
+// {
+// int c;
+//
+// while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
+// {
+// switch (c)
+// {
+// case ('a'):
+// TRACE(("option a\n"));
+// //
+// // set some flag here
+// //
+// break;
+//
+// case ('B'):
+// TRACE( ("option B\n"));
+// //
+// // set some other flag here
+// //
+// break;
+//
+// case ('n'):
+// TRACE(("option n: value=%d\n"), atoi(optarg));
+// //
+// // do something with value here
+// //
+// break;
+//
+// case ('?'):
+// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
+// return FALSE;
+// break;
+//
+// default:
+// TRACE(("WARNING: no handler for option %c\n"), c);
+// return FALSE;
+// break;
+// }
+// }
+// //
+// // check for non-option args here
+// //
+// return TRUE;
+// }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+char *optarg; // global argument pointer
+int optind = 0; // global argv index
+
+int getopt(int argc, char *argv[], char *optstring)
+{
+ static char *next = NULL;
+ if (optind == 0)
+ next = NULL;
+
+ optarg = NULL;
+
+ if (next == NULL || *next =='\0') {
+ if (optind == 0)
+ optind++;
+
+ if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) {
+ optarg = NULL;
+ if (optind < argc)
+ optarg = argv[optind];
+ return EOF;
+ }
+
+ if (strcmp(argv[optind], "--") == 0) {
+ optind++;
+ optarg = NULL;
+ if (optind < argc)
+ optarg = argv[optind];
+ return EOF;
+ }
+
+ next = argv[optind];
+ next++; // skip past -
+ optind++;
+ }
+
+ char c = *next++;
+ char *cp = strchr(optstring, c);
+
+ if (cp == NULL || c == (':'))
+ return ('?');
+
+ cp++;
+ if (*cp == (':')) {
+ if (*next != ('\0')) {
+ optarg = next;
+ next = NULL;
+ } else if (optind < argc) {
+ optarg = argv[optind];
+ optind++;
+ } else {
+ return ('?');
+ }
+ }
+
+ return c;
+}
+
+// for an overview, see
+// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
+double lgamma(int x)
+{
+ // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
+ if (x <= 2) {
+ return 0.0;
+ }
+ static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
+ double tmp=(double)x+5.5;
+ tmp -= (((double)x)+0.5)*log(tmp);
+ double y=(double)x;
+ double sum = 1.000000000190015;
+ for (size_t j=0; j<6; ++j) {
+ sum += coefs[j]/++y;
+ }
+ return -tmp+log(2.5066282746310005*sum/(double)x);
+}
diff --git a/contrib/relent-filter/sigtest-filter/WIN32_functions.h b/contrib/relent-filter/sigtest-filter/WIN32_functions.h
index 6a719392e..ad644018b 100755
--- a/contrib/relent-filter/sigtest-filter/WIN32_functions.h
+++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.h
@@ -1,24 +1,24 @@
-// XGetopt.h Version 1.2
-//
-// Author: Hans Dietrich
-// hdietrich2@hotmail.com
-//
-// This software is released into the public domain.
-// You are free to use it in any way you like.
-//
-// This software is provided "as is" with no expressed
-// or implied warranty. I accept no liability for any
-// damage or loss of business that this software may cause.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef XGETOPT_H
-#define XGETOPT_H
-
-extern int optind, opterr;
-extern char *optarg;
-
-int getopt(int argc, char *argv[], char *optstring);
-double lgamma(int x);
-
-#endif //XGETOPT_H
+// XGetopt.h Version 1.2
+//
+// Author: Hans Dietrich
+// hdietrich2@hotmail.com
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty. I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef XGETOPT_H
+#define XGETOPT_H
+
+extern int optind, opterr;
+extern char *optarg;
+
+int getopt(int argc, char *argv[], char *optstring);
+double lgamma(int x);
+
+#endif //XGETOPT_H
diff --git a/contrib/relent-filter/sigtest-filter/filter-pt.cpp b/contrib/relent-filter/sigtest-filter/filter-pt.cpp
index 4a51953ea..e2408900d 100755
--- a/contrib/relent-filter/sigtest-filter/filter-pt.cpp
+++ b/contrib/relent-filter/sigtest-filter/filter-pt.cpp
@@ -1,5 +1,5 @@
-#include <cstring>
+#include <cstring>
#include <cassert>
#include <cstdio>
#include <cstdlib>
diff --git a/contrib/relent-filter/src/IOWrapper.cpp b/contrib/relent-filter/src/IOWrapper.cpp
index 053735c96..7ad7697ce 100755
--- a/contrib/relent-filter/src/IOWrapper.cpp
+++ b/contrib/relent-filter/src/IOWrapper.cpp
@@ -234,13 +234,13 @@ void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset,
{
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
AlignVec alignments = ai.GetSortedAlignments();
-
+
AlignVec::const_iterator it;
for (it = alignments.begin(); it != alignments.end(); ++it) {
const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
}
-
+
}
void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
@@ -251,7 +251,7 @@ void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
const Hypothesis &edge = *edges[currEdge];
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
-
+
OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
targetOffset += tp.GetSize();
@@ -263,7 +263,7 @@ void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<co
{
ostringstream out;
OutputAlignment(out, edges);
-
+
collector->Write(lineNo,out.str());
}
@@ -477,7 +477,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
const int sourceOffset = sourceRange.GetStartPos();
const int targetOffset = targetRange.GetStartPos();
const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
-
+
OutputAlignment(out, ai, sourceOffset, targetOffset);
}
diff --git a/contrib/relent-filter/src/Main.cpp b/contrib/relent-filter/src/Main.cpp
index 3c7911248..6a2bf4b01 100755
--- a/contrib/relent-filter/src/Main.cpp
+++ b/contrib/relent-filter/src/Main.cpp
@@ -168,18 +168,18 @@ static void ShowWeights()
int main(int argc, char** argv)
{
try {
-
+
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
TRACE_ERR(endl);
}
-
+
// set number of significant decimals in output
fix(cout,PRECISION);
fix(cerr,PRECISION);
-
+
// load all the settings into the Parameter class
// (stores them as strings, or array of strings)
Parameter* params = new Parameter();
@@ -187,34 +187,34 @@ int main(int argc, char** argv)
params->Explain();
exit(1);
}
-
-
+
+
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}
-
+
// setting "-show-weights" -> just dump out weights and exit
if (params->isParamSpecified("show-weights")) {
ShowWeights();
exit(0);
}
-
+
// shorthand for accessing information in StaticData
const StaticData& staticData = StaticData::Instance();
-
-
+
+
//initialise random numbers
rand_init();
-
+
// set up read/writing class
IOWrapper* ioWrapper = GetIOWrapper(staticData);
if (!ioWrapper) {
cerr << "Error; Failed to create IO object" << endl;
exit(1);
}
-
+
// check on weights
vector<float> weights = staticData.GetAllWeights();
IFVERBOSE(2) {
@@ -233,7 +233,7 @@ int main(int argc, char** argv)
// setting lexicalized reordering setup
PhraseBasedReorderingState::m_useFirstBackwardScore = false;
-
+
auto_ptr<OutputCollector> outputCollector;
outputCollector.reset(new OutputCollector());
@@ -241,7 +241,7 @@ int main(int argc, char** argv)
#ifdef WITH_THREADS
ThreadPool pool(staticData.ThreadCount());
#endif
-
+
// main loop over set of input sentences
InputType* source = NULL;
size_t lineCount = 0;
@@ -259,11 +259,11 @@ int main(int argc, char** argv)
task->Run();
delete task;
#endif
-
+
source = NULL; //make sure it doesn't get deleted
++lineCount;
}
-
+
// we are done, finishing up
#ifdef WITH_THREADS
pool.Stop(true); //flush remaining jobs
diff --git a/contrib/relent-filter/src/RelativeEntropyCalc.cpp b/contrib/relent-filter/src/RelativeEntropyCalc.cpp
index 212eedf87..9ba334fca 100755
--- a/contrib/relent-filter/src/RelativeEntropyCalc.cpp
+++ b/contrib/relent-filter/src/RelativeEntropyCalc.cpp
@@ -70,7 +70,7 @@ namespace MosesCmd
if (neg_log_div > 100){
return 100;
}
- return neg_log_div;
+ return neg_log_div;
}
void RelativeEntropyCalc::ConcatOutputPhraseRecursive(Phrase& phrase, const Hypothesis *hypo){
diff --git a/contrib/relent-filter/src/TranslationAnalysis.cpp b/contrib/relent-filter/src/TranslationAnalysis.cpp
index 89da48301..eb5f36293 100755
--- a/contrib/relent-filter/src/TranslationAnalysis.cpp
+++ b/contrib/relent-filter/src/TranslationAnalysis.cpp
@@ -57,7 +57,7 @@ void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os,
}
}
}
-
+
bool epsilon = false;
if (target == "") {
target="<EPSILON>";
diff --git a/contrib/rephraser/paraphrase.cpp b/contrib/rephraser/paraphrase.cpp
index ad9dbc891..0556d6ccd 100644
--- a/contrib/rephraser/paraphrase.cpp
+++ b/contrib/rephraser/paraphrase.cpp
@@ -60,12 +60,12 @@ static void add(const string& e, const vector<float> scores,
static void finalise(Probs& p_e_given_f, Probs& p_f_given_e) {
//cerr << "Sizes: p(e|f): " << p_e_given_f.size() << " p(f|e): " << p_f_given_e.size() << endl;
- for (Probs::const_iterator e1_iter = p_f_given_e.begin() ;
+ for (Probs::const_iterator e1_iter = p_f_given_e.begin() ;
e1_iter != p_f_given_e.end(); ++e1_iter) {
for (Probs::const_iterator e2_iter = p_e_given_f.begin() ;
e2_iter != p_e_given_f.end(); ++e2_iter) {
- if (e1_iter->second == e2_iter->second) continue;
+ if (e1_iter->second == e2_iter->second) continue;
cout << e1_iter->second << " ||| " << e2_iter->second << " ||| " <<
e1_iter->first * e2_iter->first << " ||| " << endl;
}
diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index edf7daa13..337962aa6 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -3,10 +3,10 @@
// The separate moses server executable is being phased out.
// Since there were problems with the migration into the main
// executable, this separate program is still included in the
-// distribution for legacy reasons. Contributors are encouraged
-// to add their contributions to moses/server rather than
+// distribution for legacy reasons. Contributors are encouraged
+// to add their contributions to moses/server rather than
// contrib/server. This recommendation does not apply to wrapper
-// scripts.
+// scripts.
// The future is this:
/** main function of the command line version of the decoder **/
@@ -83,7 +83,7 @@ public:
pdsa->add(source_,target_,alignment_);
#else
const PhraseDictionary* pdf = PhraseDictionary::GetColl()[0];
- PhraseDictionaryDynSuffixArray*
+ PhraseDictionaryDynSuffixArray*
pdsa = (PhraseDictionaryDynSuffixArray*) pdf;
cerr << "Inserting into address " << pdsa << endl;
pdsa->insertSnt(source_, target_, alignment_);
@@ -146,7 +146,7 @@ public:
}
}
*/
-
+
void breakOutParams(const params_t& params) {
params_t::const_iterator si = params.find("source");
if(si == params.end())
@@ -236,7 +236,7 @@ public:
class TranslationTask : public virtual Moses::TranslationTask {
protected:
TranslationTask(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond, boost::mutex& mut)
+ boost::condition_variable& cond, boost::mutex& mut)
: m_paramList(paramList),
m_cond(cond),
m_mut(mut),
@@ -244,7 +244,7 @@ protected:
{}
public:
- static boost::shared_ptr<TranslationTask>
+ static boost::shared_ptr<TranslationTask>
create(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut)
{
@@ -252,15 +252,15 @@ public:
ret->m_self = ret;
return ret;
}
-
+
virtual bool DeleteAfterExecution() {return false;}
bool IsDone() const {return m_done;}
const map<string, xmlrpc_c::value>& GetRetData() { return m_retData;}
- virtual void
- Run()
+ virtual void
+ Run()
{
using namespace xmlrpc_c;
const params_t params = m_paramList.getStruct(0);
@@ -292,25 +292,25 @@ public:
vector<float> multiModelWeights;
si = params.find("lambda");
- if (si != params.end())
+ if (si != params.end())
{
value_array multiModelArray = value_array(si->second);
vector<value> multiModelValueVector(multiModelArray.vectorValueValue());
- for (size_t i=0;i < multiModelValueVector.size();i++)
+ for (size_t i=0;i < multiModelValueVector.size();i++)
{
multiModelWeights.push_back(value_double(multiModelValueVector[i]));
}
}
si = params.find("model_name");
- if (si != params.end() && multiModelWeights.size() > 0)
+ if (si != params.end() && multiModelWeights.size() > 0)
{
const string model_name = value_string(si->second);
- PhraseDictionaryMultiModel* pdmm
+ PhraseDictionaryMultiModel* pdmm
= (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
pdmm->SetTemporaryMultiModelWeightsVector(multiModelWeights);
}
-
+
const StaticData &staticData = StaticData::Instance();
//Make sure alternative paths are retained, if necessary
@@ -321,7 +321,7 @@ public:
stringstream out, graphInfo, transCollOpts;
- if (staticData.IsSyntax())
+ if (staticData.IsSyntax())
{
boost::shared_ptr<TreeInput> tinput(new TreeInput);
const vector<FactorType>& IFO = staticData.GetInputFactorOrder();
@@ -338,8 +338,8 @@ public:
manager.OutputSearchGraphMoses(sgstream);
m_retData["sg"] = value_string(sgstream.str());
}
- }
- else
+ }
+ else
{
// size_t lineNumber = 0; // TODO: Include sentence request number here?
boost::shared_ptr<Sentence> sentence(new Sentence(0,source));
@@ -351,30 +351,30 @@ public:
vector<xmlrpc_c::value> alignInfo;
outputHypo(out,hypo,addAlignInfo,alignInfo,reportAllFactors);
if (addAlignInfo) m_retData["align"] = value_array(alignInfo);
- if (addWordAlignInfo)
+ if (addWordAlignInfo)
{
stringstream wordAlignment;
hypo->OutputAlignment(wordAlignment);
vector<xmlrpc_c::value> alignments;
string alignmentPair;
- while (wordAlignment >> alignmentPair)
+ while (wordAlignment >> alignmentPair)
{
int pos = alignmentPair.find('-');
map<string, xmlrpc_c::value> wordAlignInfo;
- wordAlignInfo["source-word"]
+ wordAlignInfo["source-word"]
= value_int(atoi(alignmentPair.substr(0, pos).c_str()));
- wordAlignInfo["target-word"]
+ wordAlignInfo["target-word"]
= value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
alignments.push_back(value_struct(wordAlignInfo));
}
m_retData["word-align"] = value_array(alignments);
}
-
+
if (addGraphInfo) insertGraphInfo(manager,m_retData);
if (addTopts) insertTranslationOptions(manager,m_retData);
- if (nbest_size > 0)
+ if (nbest_size > 0)
{
- outputNBest(manager, m_retData, nbest_size, nbest_distinct,
+ outputNBest(manager, m_retData, nbest_size, nbest_distinct,
reportAllFactors, addAlignInfo, addScoreBreakdown);
}
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
@@ -389,11 +389,11 @@ public:
}
- void outputHypo(ostream& out, const Hypothesis* hypo,
- bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo,
+ void outputHypo(ostream& out, const Hypothesis* hypo,
+ bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo,
bool reportAllFactors = false) {
if (hypo->GetPrevHypo() != NULL) {
- outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo,
+ outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo,
alignInfo, reportAllFactors);
Phrase p = hypo->GetCurrTargetPhrase();
if(reportAllFactors) {
@@ -547,14 +547,14 @@ public:
retData.insert(pair<string, xmlrpc_c::value>("nbest", xmlrpc_c::value_array(nBestXml)));
}
- void
- insertTranslationOptions(Manager& manager, map<string, xmlrpc_c::value>& retData)
+ void
+ insertTranslationOptions(Manager& manager, map<string, xmlrpc_c::value>& retData)
{
const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions();
vector<xmlrpc_c::value> toptsXml;
size_t const stop = toptsColl->GetSource().GetSize();
TranslationOptionList const* tol;
- for (size_t s = 0 ; s < stop ; ++s)
+ for (size_t s = 0 ; s < stop ; ++s)
{
for (size_t e = s; (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL; ++e)
{
@@ -569,11 +569,11 @@ public:
toptXml["start"] = xmlrpc_c::value_int(s);
toptXml["end"] = xmlrpc_c::value_int(e);
vector<xmlrpc_c::value> scoresXml;
- const std::valarray<FValue> &scores
+ const std::valarray<FValue> &scores
= topt->GetScoreBreakdown().getCoreFeatures();
- for (size_t j = 0; j < scores.size(); ++j)
+ for (size_t j = 0; j < scores.size(); ++j)
scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
-
+
toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
}
@@ -581,7 +581,7 @@ public:
}
retData.insert(pair<string, xmlrpc_c::value>("topt", xmlrpc_c::value_array(toptsXml)));
}
-
+
private:
xmlrpc_c::paramList const& m_paramList;
map<string, xmlrpc_c::value> m_retData;
@@ -619,8 +619,8 @@ private:
Moses::ThreadPool m_threadPool;
};
-static
-void
+static
+void
PrintFeatureWeight(ostream& out, const FeatureFunction* ff)
{
out << ff->GetScoreProducerDescription() << "=";
@@ -632,16 +632,16 @@ PrintFeatureWeight(ostream& out, const FeatureFunction* ff)
out << endl;
}
-static
-void
+static
+void
ShowWeights(ostream& out)
{
// adapted from moses-cmd/Main.cpp
std::ios::fmtflags old_flags = out.setf(std::ios::fixed);
size_t old_precision = out.precision(6);
- const vector<const StatelessFeatureFunction*>&
+ const vector<const StatelessFeatureFunction*>&
slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
- const vector<const StatefulFeatureFunction*>&
+ const vector<const StatefulFeatureFunction*>&
sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
for (size_t i = 0; i < sff.size(); ++i) {
@@ -662,7 +662,7 @@ ShowWeights(ostream& out)
out << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
- if (! (old_flags & std::ios::fixed))
+ if (! (old_flags & std::ios::fixed))
out.unsetf(std::ios::fixed);
out.precision(old_precision);
}
@@ -754,7 +754,7 @@ int main(int argc, char** argv)
.allowOrigin("*")
);
*/
-
+
XVERBOSE(1,"Listening on port " << port << endl);
if (isSerial) {
while(1) myAbyssServer.runOnce();
diff --git a/contrib/sigtest-filter/WIN32_functions.cpp b/contrib/sigtest-filter/WIN32_functions.cpp
index cfc15d592..989c727a2 100644
--- a/contrib/sigtest-filter/WIN32_functions.cpp
+++ b/contrib/sigtest-filter/WIN32_functions.cpp
@@ -1,231 +1,231 @@
-// XGetopt.cpp Version 1.2
-//
-// Author: Hans Dietrich
-// hdietrich2@hotmail.com
-//
-// Description:
-// XGetopt.cpp implements getopt(), a function to parse command lines.
-//
-// History
-// Version 1.2 - 2003 May 17
-// - Added Unicode support
-//
-// Version 1.1 - 2002 March 10
-// - Added example to XGetopt.cpp module header
-//
-// This software is released into the public domain.
-// You are free to use it in any way you like.
-//
-// This software is provided "as is" with no expressed
-// or implied warranty. I accept no liability for any
-// damage or loss of business that this software may cause.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-
-///////////////////////////////////////////////////////////////////////////////
-// if you are using precompiled headers then include this line:
-///////////////////////////////////////////////////////////////////////////////
-
-
-///////////////////////////////////////////////////////////////////////////////
-// if you are not using precompiled headers then include these lines:
-//#include <windows.h>
-//#include <cstdio>
-//#include <tchar.h>
-///////////////////////////////////////////////////////////////////////////////
-
-
-#include <cstdio>
-#include <cstring>
-#include <cmath>
-#include "WIN32_functions.h"
-
-
-///////////////////////////////////////////////////////////////////////////////
-//
-// X G e t o p t . c p p
-//
-//
-// NAME
-// getopt -- parse command line options
-//
-// SYNOPSIS
-// int getopt(int argc, char *argv[], char *optstring)
-//
-// extern char *optarg;
-// extern int optind;
-//
-// DESCRIPTION
-// The getopt() function parses the command line arguments. Its
-// arguments argc and argv are the argument count and array as
-// passed into the application on program invocation. In the case
-// of Visual C++ programs, argc and argv are available via the
-// variables __argc and __argv (double underscores), respectively.
-// getopt returns the next option letter in argv that matches a
-// letter in optstring. (Note: Unicode programs should use
-// __targv instead of __argv. Also, all character and string
-// literals should be enclosed in ( ) ).
-//
-// optstring is a string of recognized option letters; if a letter
-// is followed by a colon, the option is expected to have an argument
-// that may or may not be separated from it by white space. optarg
-// is set to point to the start of the option argument on return from
-// getopt.
-//
-// Option letters may be combined, e.g., "-ab" is equivalent to
-// "-a -b". Option letters are case sensitive.
-//
-// getopt places in the external variable optind the argv index
-// of the next argument to be processed. optind is initialized
-// to 0 before the first call to getopt.
-//
-// When all options have been processed (i.e., up to the first
-// non-option argument), getopt returns EOF, optarg will point
-// to the argument, and optind will be set to the argv index of
-// the argument. If there are no non-option arguments, optarg
-// will be set to NULL.
-//
-// The special option "--" may be used to delimit the end of the
-// options; EOF will be returned, and "--" (and everything after it)
-// will be skipped.
-//
-// RETURN VALUE
-// For option letters contained in the string optstring, getopt
-// will return the option letter. getopt returns a question mark (?)
-// when it encounters an option letter not included in optstring.
-// EOF is returned when processing is finished.
-//
-// BUGS
-// 1) Long options are not supported.
-// 2) The GNU double-colon extension is not supported.
-// 3) The environment variable POSIXLY_CORRECT is not supported.
-// 4) The + syntax is not supported.
-// 5) The automatic permutation of arguments is not supported.
-// 6) This implementation of getopt() returns EOF if an error is
-// encountered, instead of -1 as the latest standard requires.
-//
-// EXAMPLE
-// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
-// {
-// int c;
-//
-// while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
-// {
-// switch (c)
-// {
-// case ('a'):
-// TRACE(("option a\n"));
-// //
-// // set some flag here
-// //
-// break;
-//
-// case ('B'):
-// TRACE( ("option B\n"));
-// //
-// // set some other flag here
-// //
-// break;
-//
-// case ('n'):
-// TRACE(("option n: value=%d\n"), atoi(optarg));
-// //
-// // do something with value here
-// //
-// break;
-//
-// case ('?'):
-// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
-// return FALSE;
-// break;
-//
-// default:
-// TRACE(("WARNING: no handler for option %c\n"), c);
-// return FALSE;
-// break;
-// }
-// }
-// //
-// // check for non-option args here
-// //
-// return TRUE;
-// }
-//
-///////////////////////////////////////////////////////////////////////////////
-
-char *optarg; // global argument pointer
-int optind = 0; // global argv index
-
-int getopt(int argc, char *argv[], char *optstring)
-{
- static char *next = NULL;
- if (optind == 0)
- next = NULL;
-
- optarg = NULL;
-
- if (next == NULL || *next =='\0') {
- if (optind == 0)
- optind++;
-
- if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) {
- optarg = NULL;
- if (optind < argc)
- optarg = argv[optind];
- return EOF;
- }
-
- if (strcmp(argv[optind], "--") == 0) {
- optind++;
- optarg = NULL;
- if (optind < argc)
- optarg = argv[optind];
- return EOF;
- }
-
- next = argv[optind];
- next++; // skip past -
- optind++;
- }
-
- char c = *next++;
- char *cp = strchr(optstring, c);
-
- if (cp == NULL || c == (':'))
- return ('?');
-
- cp++;
- if (*cp == (':')) {
- if (*next != ('\0')) {
- optarg = next;
- next = NULL;
- } else if (optind < argc) {
- optarg = argv[optind];
- optind++;
- } else {
- return ('?');
- }
- }
-
- return c;
-}
-
-// for an overview, see
-// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
-double lgamma(int x)
-{
- // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
- if (x <= 2) {
- return 0.0;
- }
- static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
- double tmp=(double)x+5.5;
- tmp -= (((double)x)+0.5)*log(tmp);
- double y=(double)x;
- double sum = 1.000000000190015;
- for (size_t j=0; j<6; ++j) {
- sum += coefs[j]/++y;
- }
- return -tmp+log(2.5066282746310005*sum/(double)x);
-}
+// XGetopt.cpp Version 1.2
+//
+// Author: Hans Dietrich
+// hdietrich2@hotmail.com
+//
+// Description:
+// XGetopt.cpp implements getopt(), a function to parse command lines.
+//
+// History
+// Version 1.2 - 2003 May 17
+// - Added Unicode support
+//
+// Version 1.1 - 2002 March 10
+// - Added example to XGetopt.cpp module header
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty. I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are using precompiled headers then include this line:
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are not using precompiled headers then include these lines:
+//#include <windows.h>
+//#include <cstdio>
+//#include <tchar.h>
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <cstdio>
+#include <cstring>
+#include <cmath>
+#include "WIN32_functions.h"
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// X G e t o p t . c p p
+//
+//
+// NAME
+// getopt -- parse command line options
+//
+// SYNOPSIS
+// int getopt(int argc, char *argv[], char *optstring)
+//
+// extern char *optarg;
+// extern int optind;
+//
+// DESCRIPTION
+// The getopt() function parses the command line arguments. Its
+// arguments argc and argv are the argument count and array as
+// passed into the application on program invocation. In the case
+// of Visual C++ programs, argc and argv are available via the
+// variables __argc and __argv (double underscores), respectively.
+// getopt returns the next option letter in argv that matches a
+// letter in optstring. (Note: Unicode programs should use
+// __targv instead of __argv. Also, all character and string
+// literals should be enclosed in ( ) ).
+//
+// optstring is a string of recognized option letters; if a letter
+// is followed by a colon, the option is expected to have an argument
+// that may or may not be separated from it by white space. optarg
+// is set to point to the start of the option argument on return from
+// getopt.
+//
+// Option letters may be combined, e.g., "-ab" is equivalent to
+// "-a -b". Option letters are case sensitive.
+//
+// getopt places in the external variable optind the argv index
+// of the next argument to be processed. optind is initialized
+// to 0 before the first call to getopt.
+//
+// When all options have been processed (i.e., up to the first
+// non-option argument), getopt returns EOF, optarg will point
+// to the argument, and optind will be set to the argv index of
+// the argument. If there are no non-option arguments, optarg
+// will be set to NULL.
+//
+// The special option "--" may be used to delimit the end of the
+// options; EOF will be returned, and "--" (and everything after it)
+// will be skipped.
+//
+// RETURN VALUE
+// For option letters contained in the string optstring, getopt
+// will return the option letter. getopt returns a question mark (?)
+// when it encounters an option letter not included in optstring.
+// EOF is returned when processing is finished.
+//
+// BUGS
+// 1) Long options are not supported.
+// 2) The GNU double-colon extension is not supported.
+// 3) The environment variable POSIXLY_CORRECT is not supported.
+// 4) The + syntax is not supported.
+// 5) The automatic permutation of arguments is not supported.
+// 6) This implementation of getopt() returns EOF if an error is
+// encountered, instead of -1 as the latest standard requires.
+//
+// EXAMPLE
+// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
+// {
+// int c;
+//
+// while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
+// {
+// switch (c)
+// {
+// case ('a'):
+// TRACE(("option a\n"));
+// //
+// // set some flag here
+// //
+// break;
+//
+// case ('B'):
+// TRACE( ("option B\n"));
+// //
+// // set some other flag here
+// //
+// break;
+//
+// case ('n'):
+// TRACE(("option n: value=%d\n"), atoi(optarg));
+// //
+// // do something with value here
+// //
+// break;
+//
+// case ('?'):
+// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
+// return FALSE;
+// break;
+//
+// default:
+// TRACE(("WARNING: no handler for option %c\n"), c);
+// return FALSE;
+// break;
+// }
+// }
+// //
+// // check for non-option args here
+// //
+// return TRUE;
+// }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+char *optarg; // global argument pointer
+int optind = 0; // global argv index
+
+int getopt(int argc, char *argv[], char *optstring)
+{
+ static char *next = NULL;
+ if (optind == 0)
+ next = NULL;
+
+ optarg = NULL;
+
+ if (next == NULL || *next =='\0') {
+ if (optind == 0)
+ optind++;
+
+ if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) {
+ optarg = NULL;
+ if (optind < argc)
+ optarg = argv[optind];
+ return EOF;
+ }
+
+ if (strcmp(argv[optind], "--") == 0) {
+ optind++;
+ optarg = NULL;
+ if (optind < argc)
+ optarg = argv[optind];
+ return EOF;
+ }
+
+ next = argv[optind];
+ next++; // skip past -
+ optind++;
+ }
+
+ char c = *next++;
+ char *cp = strchr(optstring, c);
+
+ if (cp == NULL || c == (':'))
+ return ('?');
+
+ cp++;
+ if (*cp == (':')) {
+ if (*next != ('\0')) {
+ optarg = next;
+ next = NULL;
+ } else if (optind < argc) {
+ optarg = argv[optind];
+ optind++;
+ } else {
+ return ('?');
+ }
+ }
+
+ return c;
+}
+
+// for an overview, see
+// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
+double lgamma(int x)
+{
+ // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
+ if (x <= 2) {
+ return 0.0;
+ }
+ static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
+ double tmp=(double)x+5.5;
+ tmp -= (((double)x)+0.5)*log(tmp);
+ double y=(double)x;
+ double sum = 1.000000000190015;
+ for (size_t j=0; j<6; ++j) {
+ sum += coefs[j]/++y;
+ }
+ return -tmp+log(2.5066282746310005*sum/(double)x);
+}
diff --git a/contrib/sigtest-filter/WIN32_functions.h b/contrib/sigtest-filter/WIN32_functions.h
index 6a719392e..ad644018b 100644
--- a/contrib/sigtest-filter/WIN32_functions.h
+++ b/contrib/sigtest-filter/WIN32_functions.h
@@ -1,24 +1,24 @@
-// XGetopt.h Version 1.2
-//
-// Author: Hans Dietrich
-// hdietrich2@hotmail.com
-//
-// This software is released into the public domain.
-// You are free to use it in any way you like.
-//
-// This software is provided "as is" with no expressed
-// or implied warranty. I accept no liability for any
-// damage or loss of business that this software may cause.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef XGETOPT_H
-#define XGETOPT_H
-
-extern int optind, opterr;
-extern char *optarg;
-
-int getopt(int argc, char *argv[], char *optstring);
-double lgamma(int x);
-
-#endif //XGETOPT_H
+// XGetopt.h Version 1.2
+//
+// Author: Hans Dietrich
+// hdietrich2@hotmail.com
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty. I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef XGETOPT_H
+#define XGETOPT_H
+
+extern int optind, opterr;
+extern char *optarg;
+
+int getopt(int argc, char *argv[], char *optstring);
+double lgamma(int x);
+
+#endif //XGETOPT_H
diff --git a/contrib/sigtest-filter/filter-pt.cpp b/contrib/sigtest-filter/filter-pt.cpp
index bd0b9ae36..50418d502 100644
--- a/contrib/sigtest-filter/filter-pt.cpp
+++ b/contrib/sigtest-filter/filter-pt.cpp
@@ -1,5 +1,5 @@
-#include <cstring>
+#include <cstring>
#include <cassert>
#include <cstdio>
#include <cstdlib>
@@ -14,7 +14,7 @@
#include <set>
#include <boost/thread/tss.hpp>
-#include <boost/thread.hpp>
+#include <boost/thread.hpp>
#include <boost/unordered_map.hpp>
#ifdef WIN32
@@ -58,9 +58,9 @@ typedef boost::shared_ptr<std::vector<TextLenType> > SentIdSet;
class Cache {
typedef std::pair<SentIdSet, clock_t> ClockedSet;
typedef boost::unordered_map<std::string, ClockedSet> ClockedMap;
-
+
public:
-
+
SentIdSet get(const std::string& phrase) {
boost::shared_lock<boost::shared_mutex> lock(m_mutex);
if(m_cont.count(phrase)) {
@@ -70,27 +70,27 @@ class Cache {
}
return SentIdSet( new SentIdSet::element_type() );
}
-
+
void put(const std::string& phrase, const SentIdSet set) {
boost::unique_lock<boost::shared_mutex> lock(m_mutex);
m_cont[phrase] = std::make_pair(set, clock());
}
-
+
static void set_max_cache(size_t max_cache) {
s_max_cache = max_cache;
}
-
+
void prune() {
if(s_max_cache > 0) {
boost::upgrade_lock<boost::shared_mutex> lock(m_mutex);
if(m_cont.size() > s_max_cache) {
std::vector<clock_t> clocks;
- for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
+ for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
clocks.push_back(it->second.second);
-
+
std::sort(clocks.begin(), clocks.end());
clock_t out = clocks[m_cont.size() - s_max_cache];
-
+
boost::upgrade_to_unique_lock<boost::shared_mutex> uniq_lock(lock);
for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
if(it->second.second < out)
@@ -98,7 +98,7 @@ class Cache {
}
}
}
-
+
private:
ClockedMap m_cont;
boost::shared_mutex m_mutex;
@@ -282,12 +282,12 @@ void lookup_phrase(SentIdSet& ids, const std::string& phrase,
i != locations.end(); ++i) {
ids->push_back(i->sentIdInCorpus);
}
-
+
std::sort(ids->begin(), ids->end());
SentIdSet::element_type::iterator it =
std::unique(ids->begin(), ids->end());
ids->resize(it - ids->begin());
-
+
if(ids->size() >= MINIMUM_SIZE_TO_KEEP)
cache.put(phrase, ids);
}
@@ -295,8 +295,8 @@ void lookup_phrase(SentIdSet& ids, const std::string& phrase,
void lookup_multiple_phrases(SentIdSet& ids, vector<std::string> & phrases,
C_SuffixArraySearchApplicationBase & my_sa,
- const std::string & rule, Cache& cache)
-{
+ const std::string & rule, Cache& cache)
+{
if (phrases.size() == 1) {
lookup_phrase(ids, phrases.front(), my_sa, cache);
@@ -372,32 +372,32 @@ void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options,
delete *i;
options.erase(options.begin() + pfe_filter_limit,options.end());
}
-
+
if (pef_filter_only)
return;
-
+
if (options.empty())
return;
-
+
SentIdSet fset( new SentIdSet::element_type() );
find_occurrences(fset, options.front()->f_phrase, f_sa, f_cache);
size_t cf = fset->size();
-
+
for (std::vector<PTEntry*>::iterator i = options.begin();
i != options.end(); ++i) {
const std::string& e_phrase = (*i)->e_phrase;
SentIdSet eset( new SentIdSet::element_type() );
find_occurrences(eset, e_phrase, e_sa, e_cache);
size_t ce = eset->size();
-
+
SentIdSet efset( new SentIdSet::element_type() );
ordered_set_intersect(efset, fset, eset);
size_t cef = efset->size();
-
+
double nlp = -log(fisher_exact(cef, cf, ce));
(*i)->set_cooc_stats(cef, cf, ce, nlp);
}
-
+
std::vector<PTEntry*>::iterator new_end =
std::remove_if(options.begin(), options.end(),
NlogSigThresholder(sig_filter_limit));
@@ -406,7 +406,7 @@ void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options,
}
void filter(std::istream* in, std::ostream* out, int pfe_index) {
-
+
std::vector<std::string> lines;
std::string prev = "";
std::vector<PTEntry*> options;
@@ -415,23 +415,23 @@ void filter(std::istream* in, std::ostream* out, int pfe_index) {
boost::mutex::scoped_lock lock(in_mutex);
if(in->eof())
break;
-
+
lines.clear();
std::string line;
while(getline(*in, line) && lines.size() < 500000)
lines.push_back(line);
}
-
+
std::stringstream out_temp;
for(std::vector<std::string>::iterator it = lines.begin(); it != lines.end(); it++) {
size_t tmp_lines = ++pt_lines;
if(tmp_lines % 10000 == 0) {
boost::mutex::scoped_lock lock(err_mutex);
std::cerr << ".";
-
+
if(tmp_lines % 500000 == 0)
std::cerr << "[n:" << tmp_lines << "]\n";
-
+
if(tmp_lines % 10000000 == 0) {
float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
@@ -446,30 +446,30 @@ void filter(std::istream* in, std::ostream* out, int pfe_index) {
<< "------------------------------------------------------\n";
}
}
-
+
if(pt_lines % 10000 == 0) {
f_cache.prune();
e_cache.prune();
}
-
+
if(it->length() > 0) {
PTEntry* pp = new PTEntry(it->c_str(), pfe_index);
if (prev != pp->f_phrase) {
prev = pp->f_phrase;
-
+
if (!options.empty()) { // always true after first line
compute_cooc_stats_and_filter(options, f_cache, e_cache);
}
-
+
for (std::vector<PTEntry*>::iterator i = options.begin();
i != options.end(); ++i) {
out_temp << **i << '\n';
delete *i;
}
-
+
options.clear();
options.push_back(pp);
-
+
} else {
options.push_back(pp);
}
@@ -479,7 +479,7 @@ void filter(std::istream* in, std::ostream* out, int pfe_index) {
*out << out_temp.str() << std::flush;
}
compute_cooc_stats_and_filter(options, f_cache, e_cache);
-
+
boost::mutex::scoped_lock lock(out_mutex);
for (std::vector<PTEntry*>::iterator i = options.begin();
i != options.end(); ++i) {
@@ -512,11 +512,11 @@ int main(int argc, char * argv[])
pfe_filter_limit = atoi(optarg);
std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
break;
- case 't':
+ case 't':
threads = atoi(optarg);
std::cerr << "Using threads: " << threads << std::endl;
break;
- case 'm':
+ case 'm':
max_cache = atoi(optarg);
std::cerr << "Using max phrases in caches: " << max_cache << std::endl;
break;
@@ -548,13 +548,13 @@ int main(int argc, char * argv[])
usage();
}
}
-
+
if (sig_filter_limit == 0.0) pef_filter_only = true;
//-----------------------------------------------------------------------------
if (optind != argc || ((!efile || !ffile) && !pef_filter_only)) {
usage();
}
-
+
//load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false)
if (!pef_filter_only) {
e_sa.loadData_forSearch(efile, false, false);
@@ -582,15 +582,15 @@ int main(int argc, char * argv[])
Cache::set_max_cache(max_cache);
std::ios_base::sync_with_stdio(false);
-
+
boost::thread_group threadGroup;
- for(int i = 0; i < threads; i++)
+ for(int i = 0; i < threads; i++)
threadGroup.add_thread(new boost::thread(filter, &std::cin, &std::cout, pfe_index));
threadGroup.join_all();
float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
-
+
std::cerr << "\n\n------------------------------------------------------\n"
<< " unfiltered phrases pairs: " << pt_lines << "\n"
<< "\n"
@@ -599,5 +599,5 @@ int main(int argc, char * argv[])
<< " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n"
<< "\n"
<< " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n"
- << "------------------------------------------------------\n";
+ << "------------------------------------------------------\n";
}
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h b/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h
index 914e85e92..342f10777 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h
@@ -65,7 +65,7 @@ class Numbered : public T {
friend String& operator<< ( String& str, const Numbered<SD1,I,SD2,T,SD3>& rv ) { return str<<SD1<<rv.i<<SD2<<rv.getT()<<SD3; }
friend pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*> operator>> ( StringInput ps, Numbered<SD1,I,SD2,T,SD3>& rv ) { return pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*>(ps,&rv); }
friend StringInput operator>> ( pair<StringInput,Numbered<SD1,I,SD2,T,SD3>*> delimbuff, const char* psPostDelim ) {
- return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>psPostDelim
+ return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>psPostDelim
: delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>SD3>>psPostDelim );
}
};
@@ -106,7 +106,7 @@ template<class V>
pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const V& v ) const {
//const Scored<typename V::ElementType,pair<int,SafePtr<const V> > > sipvDummy ( DBL_MAX );
//MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const V> > > > hsiv ( MapType::size()+1, sipvDummy );
- MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >& hsiv =
+ MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >& hsiv =
const_cast<MinHeap<Scored<typename V::ElementType,pair<int,SafePtr<const NV> > > >&> ( hsivCalc );
hsiv.clear();
@@ -120,7 +120,7 @@ pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const
typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
hsiv.set(iNext).setScore() = d;
//hsiv.set(iNext).setScore() = v.getMarginalDistance ( hsiv.getMin().first, iUpper->second.second );
- ////int j =
+ ////int j =
hsiv.fixDecr(iNext);
////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
iNext++;
@@ -140,7 +140,7 @@ pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const
typename V::ElementType d = v.getMarginalDistance ( ++hsiv.setMin().first, hsiv.getMin().second.getRef() );
hsiv.setMin().setScore() += d;
////cerr<<" matching ln"<<&hsiv.getMin().second.getRef()<<" i="<<hsiv.setMin().first<<" marg-dist="<<d<<" new-score="<<hsiv.getMin().getScore();
- ////int j =
+ ////int j =
hsiv.fixIncr(0);
////cerr<<" new-pos="<<j<<"\n";
////if(j!=0) for(int i=0;i<iNext;i++) cerr<<" "<<i<<": ln"<<hsiv.get(i).second.getRef().lineNum.toInt()<<" new-score="<<double(hsiv.get(i).getScore())<<"\n";
@@ -151,7 +151,7 @@ pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const
hsiv.set(iNext).second = SafePtr<const NV> ( iUpper->second );
typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
hsiv.set(iNext).setScore() = d;
- ////int j =
+ ////int j =
hsiv.fixDecr(iNext);
////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
iNext++;
@@ -164,7 +164,7 @@ pair<typename V::ElementType,int> ArchetypeSet<V>::getDistanceOfNearest ( const
hsiv.set(iNext).second = SafePtr<const NV> ( iLower->second );
typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() );
hsiv.set(iNext).setScore() = d;
- ////int j =
+ ////int j =
hsiv.fixDecr(iNext);
////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="<<d<<" new-score="<<double(hsiv.get(j).getScore())<<" new-pos="<<j<<"\n";
iNext++;
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-array.h b/contrib/synlm/hhmm/rvtl/include/nl-array.h
index 0dfb74b44..6c6e1bb5f 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-array.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-array.h
@@ -27,7 +27,7 @@
#include <cassert>
#include <iostream>
-using namespace std;
+using namespace std;
////////////////////////////////////////////////////////////////////////////////
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-beam.h b/contrib/synlm/hhmm/rvtl/include/nl-beam.h
index 398babe21..817e96206 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-beam.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-beam.h
@@ -101,8 +101,8 @@ class Beam {
void write(FILE *pf){
/* for (typename BeamMap::const_iterator i = mkid.begin(); i != mkid.end(); i++){
i->first.write(pf);
- fprintf(pf, " %d ", i->second.first);
-// i->second.second.write(pf);
+ fprintf(pf, " %d ", i->second.first);
+// i->second.second.write(pf);
fprintf(pf, "\n");
}
*/
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-cpt.h b/contrib/synlm/hhmm/rvtl/include/nl-cpt.h
index a7c1a916c..dbfb947e3 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-cpt.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-cpt.h
@@ -394,7 +394,7 @@ class SimpleMap : public map<X,Y> {
private:
typedef map<X,Y> OrigMap;
static const Y yDummy;
-
+
public:
// Constructor / destructor methods...
SimpleMap ( ) : OrigMap() { }
@@ -899,7 +899,7 @@ class GenericHidVarCPTModel : public SimpleHash<K,typename Y::template ArrayDist
const typename Y::template ArrayDistrib<P>& getDistrib ( const K& k ) const {
return HKYP::get(k);
}
-
+
P& setProb ( const Y& y, const K& k ) {
pair<typename Y::BaseType,P>& yp = HKYP::set(k).add();
yp.first = y;
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-crf.h b/contrib/synlm/hhmm/rvtl/include/nl-crf.h
index 44744ad03..a9b233b23 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-crf.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-crf.h
@@ -36,7 +36,7 @@
//
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2>
+template <class Y,class X1,class X2>
class CRF3DModeledRV : public Y {
private:
@@ -90,7 +90,7 @@ template <class Y,class X1,class X2> SafeArray5D<Id<int>,int,int,int,int,float>
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2>
+template <class Y,class X1,class X2>
Prob CRF3DModeledRV<Y,X1,X2>::getProb( const X1& x1, const X2& x2 ) const {
SafeArray2D<int,int,int> aaCnds ( cardOff, cardSh ) ;
@@ -131,7 +131,7 @@ Prob CRF3DModeledRV<Y,X1,X2>::getProb( const X1& x1, const X2& x2 ) const {
for ( int configRghtValSite=0; configRghtValSite<(1<<bitsValSite); configRghtValSite++ )
for ( int configValOverlap=0; configValOverlap<(1<<(bitsVal-bitsValSite)); configValOverlap++ ) {
int configRghtVal = (configValOverlap<<bitsValSite)+configRghtValSite;
- // For each possible preceding trellis node...
+ // For each possible preceding trellis node...
for ( int configLeftValSite=0; configLeftValSite<(1<<bitsValSite); configLeftValSite++ ) {
int configLeftVal = (configLeftValSite<<(bitsVal-bitsValSite))+configValOverlap;
// Add product of result and previous trellis cell to current trellis cell...
@@ -158,7 +158,7 @@ Prob CRF3DModeledRV<Y,X1,X2>::getProb( const X1& x1, const X2& x2 ) const {
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2>
+template <class Y,class X1,class X2>
bool CRF3DModeledRV<Y,X1,X2>::readModelFields ( char* aps[], int numFields ) {
if ( 7==numFields )
setPotential ( X1(string(aps[1])), // globals
@@ -172,7 +172,7 @@ bool CRF3DModeledRV<Y,X1,X2>::readModelFields ( char* aps[], int numFields ) {
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2>
+template <class Y,class X1,class X2>
void CRF3DModeledRV<Y,X1,X2>::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl,
const X1& x1, const X2& x2, bool bObsVal ) const {
fprintf ( pf, "%04d> %s ", frame, psMdl );
@@ -199,7 +199,7 @@ void CRF3DModeledRV<Y,X1,X2>::writeObservCliqueConfigs ( FILE* pf, int frame, co
//
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2,class X3>
+template <class Y,class X1,class X2,class X3>
class CRF4DModeledRV : public Y {
private:
@@ -247,13 +247,13 @@ template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::c
template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::cardCnd = 0;
template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::bitsVal = 0;
template <class Y,class X1,class X2,class X3> int CRF4DModeledRV<Y,X1,X2,X3>::bitsValSite = 0;
-template <class Y,class X1,class X2,class X3> SafeArray5D<Id<int>,int,int,int,int,float>
+template <class Y,class X1,class X2,class X3> SafeArray5D<Id<int>,int,int,int,int,float>
CRF4DModeledRV<Y,X1,X2,X3>::aaaaaPotentials;
/* template <class Y,class X1,class X2> SafeArray3D<int> CRF4DModeledRV<Y,X1,X2>::aaaCnds; */
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2,class X3>
+template <class Y,class X1,class X2,class X3>
Prob CRF4DModeledRV<Y,X1,X2,X3>::getProb( const X1& x1, const X2& x2, const X3& x3 ) const {
SafeArray2D<int,int,int> aaCnds ( cardOff, cardSh ) ;
@@ -294,7 +294,7 @@ Prob CRF4DModeledRV<Y,X1,X2,X3>::getProb( const X1& x1, const X2& x2, const X3&
for ( int configRghtValSite=0; configRghtValSite<(1<<bitsValSite); configRghtValSite++ )
for ( int configValOverlap=0; configValOverlap<(1<<(bitsVal-bitsValSite)); configValOverlap++ ) {
int configRghtVal = (configValOverlap<<bitsValSite)+configRghtValSite;
- // For each possible preceding trellis node...
+ // For each possible preceding trellis node...
for ( int configLeftValSite=0; configLeftValSite<(1<<bitsValSite); configLeftValSite++ ) {
int configLeftVal = (configLeftValSite<<(bitsVal-bitsValSite))+configValOverlap;
// Add product of result and previous trellis cell to current trellis cell...
@@ -321,7 +321,7 @@ Prob CRF4DModeledRV<Y,X1,X2,X3>::getProb( const X1& x1, const X2& x2, const X3&
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2,class X3>
+template <class Y,class X1,class X2,class X3>
bool CRF4DModeledRV<Y,X1,X2,X3>::readModelFields ( char* aps[], int numFields ) {
if ( 7==numFields )
setPotential ( X1(string(aps[1])), // globals
@@ -335,9 +335,9 @@ bool CRF4DModeledRV<Y,X1,X2,X3>::readModelFields ( char* aps[], int numFields )
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2, class X3>
+template <class Y,class X1,class X2, class X3>
void CRF4DModeledRV<Y,X1,X2,X3>::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl,
- const X1& x1, const X2& x2,
+ const X1& x1, const X2& x2,
const X3& x3, bool bObsVal ) const {
fprintf ( pf, "%04d> %s ", frame, psMdl );
// For each shape (feature slope)...
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-denot.h b/contrib/synlm/hhmm/rvtl/include/nl-denot.h
index 0b50663a1..be92168b8 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-denot.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-denot.h
@@ -80,7 +80,7 @@ void VecE<N,I,RC>::read ( char* ps, const ReaderContext& rc ) {
*/
char* psT; int i=0;
for ( char* psU=strtok_r(ps,",",&psT);
- psU && i<NUM_ENTS;
+ psU && i<NUM_ENTS;
psU=strtok_r(NULL,",",&psT),i++ )
StaticSafeArray<N,I>::set(i) = psU;
}
@@ -166,7 +166,7 @@ void VecV<N,I,RC,ND1,ND2>::read ( char* ps, VecVReaderContext& rc ) {
// Chop into individual coinds strings...
char* psT; int i=0;
for ( char* psU=strtok_r(ps,",",&psT);
- psU && i<NUM_ENTS;
+ psU && i<NUM_ENTS;
psU=strtok_r(NULL,",",&psT), i++ )
asV.set(i) = psU;
@@ -230,7 +230,7 @@ class JointVecV { //// : public StaticSafeArray<V1::NUM_ENTS+V2::NUM_ENTS,I> {
static const int NUM_ENTS;
// Constructor / destructor methods...
JointVecV ( ) { }
- JointVecV ( const V1& a1, const V2& a2 ) {
+ JointVecV ( const V1& a1, const V2& a2 ) {
////fprintf(stderr,"iJoin "); a1.V1::write(stderr); fprintf(stderr," "); a2.V2::write(stderr); fprintf(stderr,"\n");
for (int i=0; i<NUM_ENTS; i++) {
if ( i<V1::NUM_ENTS ) set(i) = (a1.get(i)==-1) ? IntType(-1) : (a1.get(i)<V1::NUM_ENTS) ? IntType(a1.get(i)) : a1.get(i)+V2::NUM_ENTS;
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h b/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h
index cf6b00d28..1deb757a5 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-dtree-cont.h
@@ -75,7 +75,7 @@ class ContDTree2DModel : public Generic2DModel<Y,X,P>, public Tree<ContDecisNode
// Extraction methods...
const P getProb ( const Y y, const X& x ) const {
const Tree<ContDecisNode<Y,P> >* ptr = this;
- while ( !ptr->isTerm() ) {
+ while ( !ptr->isTerm() ) {
double sumsqr=0.0;
for(A a;a<X::getSize();a.setNext()) sumsqr += pow(x.get(a.toInt()),2.0) / X::getSize();
Wt wtdavg = -Tree<ContDecisNode<Y,P> >::getWt();
@@ -112,7 +112,7 @@ class ContDTree2DModel : public Generic2DModel<Y,X,P>, public Tree<ContDecisNode
};
////////////////////
-template <class Y,class X, class P>
+template <class Y,class X, class P>
bool ContDTree2DModel<Y,X,P>::readFields ( char* aps[], int numFields ) {
if ( /*aps[0]==sId &&*/ (3==numFields || 4==numFields) ) {
//fprintf(stderr,"%s,%d\n",aps[3],numFields);
@@ -171,7 +171,7 @@ class ContDTree3DModel : public Generic3DModel<Y,X1,X2,P> {
};
////////////////////
-template <class Y,class X1,class X2, class P>
+template <class Y,class X1,class X2, class P>
bool ContDTree3DModel<Y,X1,X2,P>::readFields ( char* aps[], int numFields ) {
if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) {
//fprintf(stderr,"%s,%d\n",aps[3],numFields);
@@ -212,7 +212,7 @@ bool ContDTree3DModel<Y,X1,X2,P>::readFields ( char* aps[], int numFields ) {
////////////////////////////////////////////////////////////////////////////////
template<class Y, class X, class P>
-class TrainableContDTree2DModel : public ContDTree2DModel<Y,X,P> {
+class TrainableContDTree2DModel : public ContDTree2DModel<Y,X,P> {
private:
List<Joint2DRV<X,Y> > lxy;
public:
@@ -225,7 +225,7 @@ class TrainableContDTree2DModel : public ContDTree2DModel<Y,X,P> {
void train ( List<Joint2DRV<X,Y> >&, const double ) ;
void train ( const double d ) { train(lxy,d); }
////// Input / output methods...
- bool readData ( char* vs[], int numFields ) {
+ bool readData ( char* vs[], int numFields ) {
if ( 3==numFields ) lxy.add() = Joint2DRV<X,Y> ( X(vs[1]), Y(vs[2]) );
else return false;
return true;
@@ -312,7 +312,7 @@ void TrainableContDTree2DModel<Y,X,P>::train ( List<Joint2DRV<X,Y> >& lxy, cons
// if ( double(rand())/double(RAND_MAX) < prRarest/modelY.getProb(pxy->getSub2()) ) {
dCtr++;
- double gamma = dTot/(dTot+dCtr); // 1.0/(double(epoch)+dCtr/dTot); // 1.0/double(epoch); // 1.0/(double(epoch)+dCtr/(dTot*prRarest*2.0)); //
+ double gamma = dTot/(dTot+dCtr); // 1.0/(double(epoch)+dCtr/dTot); // 1.0/double(epoch); // 1.0/(double(epoch)+dCtr/(dTot*prRarest*2.0)); //
// Weight deltas for next epoch...
Wt wDelta = 0.0;
@@ -333,7 +333,7 @@ void TrainableContDTree2DModel<Y,X,P>::train ( List<Joint2DRV<X,Y> >& lxy, cons
P prY = 1.0 / ( 1.0 + exp(-wtdavg) );
// Calc deltas for each feature/attribute/dimension...
- double dEachWt = 1.0/dTot; // 1.0/dTot * modelY.getProb ( Y(1-pxy->getSub2().toInt()) ); // 1.0/(dTot*prRarest*2.0); //
+ double dEachWt = 1.0/dTot; // 1.0/dTot * modelY.getProb ( Y(1-pxy->getSub2().toInt()) ); // 1.0/(dTot*prRarest*2.0); //
wDelta += dEachWt * -1 * ( prY - P(double(pxy->getSub2().toInt())) );
for ( A a; a<X::getSize(); a.setNext() )
awDeltas.set(a) += dEachWt * pxy->getSub1().get(a.toInt()) * ( prY - P(double(pxy->getSub2().toInt())) );
@@ -439,7 +439,7 @@ void TrainableContDTree2DModel<Y,X,P>::train ( List<Joint2DRV<X,Y> >& lxy, cons
////////////////////////////////////////////////////////////////////////////////
template<class Y, class X1, class X2, class P>
-class TrainableContDTree3DModel : public ContDTree3DModel<Y,X1,X2,P> {
+class TrainableContDTree3DModel : public ContDTree3DModel<Y,X1,X2,P> {
private:
@@ -455,7 +455,7 @@ class TrainableContDTree3DModel : public ContDTree3DModel<Y,X1,X2,P> {
TrainableContDTree2DModel<Y,X2,P>& setTree(const X1& x1) { return static_cast<TrainableContDTree2DModel<Y,X2,P>&>(ContDTree3DModel<Y,X1,X2,P>::setTree(x1)); }
////// Add training data to per-subphone lists...
- bool readData ( char* vs[], int numFields ) {
+ bool readData ( char* vs[], int numFields ) {
if ( 4==numFields ) {
mqlxy[X1(vs[1])].add() = Joint2DRV<X2,Y> ( X2(vs[2]), Y(vs[3]) );
////mqlxy[X1(vs[1])].getLast()->write(stderr); fprintf(stderr,"\n");
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-dtree.h b/contrib/synlm/hhmm/rvtl/include/nl-dtree.h
index 2396f395c..93a0e4d42 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-dtree.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-dtree.h
@@ -129,8 +129,8 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode<X,Y,P> > {
friend StringInput operator>> ( pair<StringInput,DTree2DModel<Y,X,P>*> si_m, const char* psD ) {
if (StringInput(NULL)==si_m.first) return si_m.first;
Y y; String xs; StringInput si,si2; si=si_m.first; DTree2DModel<Y,X,P>* pm=si_m.second;
- while((si2=si>>" ")!=NULL)si=si2;
- si=si>>xs>>" ";
+ while((si2=si>>" ")!=NULL)si=si2;
+ si=si>>xs>>" ";
while((si2=si>>" ")!=NULL)si=si2;
// Find appropriate node, creating nodes as necessary...
for(int i=1; i<int(strlen(xs.c_array()))-1; i++) {
@@ -140,22 +140,22 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode<X,Y,P> > {
if ( si!=NULL && si[0]==':' ) {
si=si>>": ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>y>>" ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>"= ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
// Specify attribute number (at nonterminal) or probability in distribution (at terminal)...
return (si!=NULL) ? si>>pm->setProb(y)>>psD : si;
}
else if ( si!=NULL && si[0]=='=' ) {
si=si>>"= "; //cerr<<" in after equals "<<((si==NULL) ? "yes" : "no") << endl;
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
//m.setA() = atoi(si.c_str());
int aVar = 0;
- si=si>>aVar>>psD;
- pm->setA()=aVar;
+ si=si>>aVar>>psD;
+ pm->setA()=aVar;
////cerr<<" at end "<<((si==NULL) ? "yes" : "no") << endl;
////cerr<<" m.getA() is "<< m.getA().toInt() << endl;
return si;
@@ -169,15 +169,15 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode<X,Y,P> > {
si=si_m.first;
sRt = si.c_str();
if (sRt.find(':')!=string::npos) {
- while((si2=si>>" [")!=NULL)si=si2;
- si=si>>xs>>"] ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" [")!=NULL)si=si2;
+ si=si>>xs>>"] ";
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>": ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>y>>" ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>"= ";
-
+
// For DTree, must find the node labeled by X
//Tree<B,DecisNode<X,Y,P> >* ptr = m;
//assert(ptr);
@@ -189,15 +189,15 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode<X,Y,P> > {
// Specify attribute number (at nonterminal) or probability in distribution (at terminal)...
return (si!=NULL) ? si>>m.setProb(y)>>psD : si;
} else {
- while((si2=si>>" [")!=NULL)si=si2;
+ while((si2=si>>" [")!=NULL)si=si2;
si=si>>xs>>"] "; //cerr<<" in bracket "<<((si==NULL) ? "yes" : "no") << endl;
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>"= "; //cerr<<" in after equals "<<((si==NULL) ? "yes" : "no") << endl;
//m.setA() = atoi(si.c_str());
int aVar = 0;
- si=si>>aVar>>psD;
- m.setA()=aVar;
+ si=si>>aVar>>psD;
+ m.setA()=aVar;
//cerr<<" at end "<<((si==NULL) ? "yes" : "no") << endl;
//cerr<<" m.getA() is "<< m.getA().toInt() << endl;
return si;
@@ -209,7 +209,7 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode<X,Y,P> > {
};
////////////////////
-template <class Y,class X, class P>
+template <class Y,class X, class P>
bool DTree2DModel<Y,X,P>::readFields ( Array<char*>& aps ) {
if ( /*aps[0]==sId &&*/ (3==aps.size() || 4==aps.size()) ) {
//fprintf(stderr,"%s,%d\n",aps[3],numFields);
@@ -269,7 +269,7 @@ class DTree3DModel {
};
////////////////////
-template <class Y,class X1,class X2, class P>
+template <class Y,class X1,class X2, class P>
bool DTree3DModel<Y,X1,X2,P>::readFields ( char* aps[], int numFields ) {
if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) {
//fprintf(stderr,"%s,%d\n",aps[3],numFields);
@@ -307,7 +307,7 @@ bool DTree3DModel<Y,X1,X2,P>::readFields ( char* aps[], int numFields ) {
////////////////////////////////////////////////////////////////////////////////
template<class Y, class X, class P>
-class TrainableDTree2DModel : public DTree2DModel<Y,X,P> {
+class TrainableDTree2DModel : public DTree2DModel<Y,X,P> {
private:
// Type members...
typedef typename X::ElementType B;
@@ -485,7 +485,7 @@ void TrainableDTree2DModel<Y,X,P>::train ( List<Joint2DRV<X,Y> >& lxy, const De
////////////////////////////////////////////////////////////////////////////////
template<class Y, class X1, class X2, class P>
-class TrainableDTree3DModel : public DTree3DModel<Y,X1,X2,P> {
+class TrainableDTree3DModel : public DTree3DModel<Y,X1,X2,P> {
private:
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h b/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h
index dbb9d9d9d..5e8b4d6d0 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h
@@ -34,7 +34,7 @@ class Matrix : public SafeArray2D<Id<int>,Id<int>,T> {
Matrix ( ) : SafeArray2D<Id<int>,Id<int>,T>( ) { }//{ xSize=0; ySize=0; }
Matrix (int x, int y) : SafeArray2D<Id<int>,Id<int>,T>(x,y) { }//{ xSize=x; ySize=y; }
Matrix (int x, int y, const T& t) : SafeArray2D<Id<int>,Id<int>,T>(x,y,t) { }//{ xSize=x; ySize=y; }
- Matrix (const Matrix& a) : SafeArray2D<Id<int>,Id<int>,T>(a.xSize(),a.ySize()) { //xSize=a.xSize; ySize=a.ySize;
+ Matrix (const Matrix& a) : SafeArray2D<Id<int>,Id<int>,T>(a.xSize(),a.ySize()) { //xSize=a.xSize; ySize=a.ySize;
for(int i=0;i<xSize();i++) for(int j=0;j<ySize();j++) this->set(i,j)=a.get(i,j); }
// Specification methods...
//Matrix& operator= ( const Matrix<T>& sat )
@@ -195,34 +195,34 @@ class Matrix : public SafeArray2D<Id<int>,Id<int>,T> {
}
return false;
}
- bool operator== ( const Matrix<T>& a ) const {
+ bool operator== ( const Matrix<T>& a ) const {
if (xSize()!=a.xSize() || ySize()!=a.ySize()) return false;
- for (int i=0;i<a.xSize();i++)
+ for (int i=0;i<a.xSize();i++)
for (int j=0;j<a.ySize();j++)
if (this->get(Id<int>(i),Id<int>(j))!=a.get(Id<int>(i),Id<int>(j))) return false;
return true;
}
// Input/output methods...
- friend ostream& operator<< ( ostream& os, const Matrix<T>& a ) {
+ friend ostream& operator<< ( ostream& os, const Matrix<T>& a ) {
os<<"\n ";
for (int i=0;i<a.xSize();i++) {
for (int j=0;j<a.ySize();j++) {
os<<((j==0)?"":",")<<a.get(Id<int>(i),Id<int>(j));
- }
+ }
os<<(i==a.xSize()-1?"\n":"\n ");
}
- return os;
+ return os;
}
- friend String& operator<< ( String& str, const Matrix<T>& a ) {
+ friend String& operator<< ( String& str, const Matrix<T>& a ) {
str<<"\n ";
for (int i=0;i<a.xSize();i++) {
for (int j=0;j<a.ySize();j++) {
str<<((j==0)?"":",")<<a.get(Id<int>(i),Id<int>(j));
- }
+ }
str<<";";
}
- return str;
+ return str;
}
string getString( ) const;
@@ -234,7 +234,7 @@ string Matrix<T>::getString() const {
for (int j=0;j<ySize();j++) {
str += ((j==0)?"":",");
str += this->get(Id<int>(i),Id<int>(j));
- }
+ }
str += ";";
}
return str;
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-gauss.h b/contrib/synlm/hhmm/rvtl/include/nl-gauss.h
index a2213086f..f5cc45159 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-gauss.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-gauss.h
@@ -43,7 +43,7 @@ static const PDFVal VARIANCE_THRESHOLD = 0.01; //0.0001; //0
//
////////////////////////////////////////////////////////////////////////////////
-template <class Y>
+template <class Y>
class DiagGauss1DModel : public Generic1DModel<Y,PDFVal> {
private:
// Member variables...
@@ -53,7 +53,7 @@ class DiagGauss1DModel : public Generic1DModel<Y,PDFVal> {
SimpleHash<Id<int>,PDFVal> aMeans;
SimpleHash<Id<int>,PDFVal> aVariances;
PDFVal prInvRootNormVariances;
- PDFVal prProduct;
+ PDFVal prProduct;
SimpleHash<Id<int>,PDFVal> algprNegHalfInvVariances;
public:
// Constructor / destructor methods...
@@ -78,7 +78,7 @@ class DiagGauss1DModel : public Generic1DModel<Y,PDFVal> {
};
////////////////////////////////////////
-template <class Y>
+template <class Y>
inline void DiagGauss1DModel<Y>::precomputeVarianceTerms ( ) {
// Inverse square root of norm of variances...
setInvRootNormVar() = 1.0;
@@ -92,7 +92,7 @@ inline void DiagGauss1DModel<Y>::precomputeVarianceTerms ( ) {
}
////////////////////////////////////////
-template <class Y>
+template <class Y>
inline PDFVal DiagGauss1DModel<Y>::getProb ( const Y& y ) const {
// fprintf(stderr,"--------------------\n");
// y.write(stderr);
@@ -109,7 +109,7 @@ inline PDFVal DiagGauss1DModel<Y>::getProb ( const Y& y ) const {
}
////////////////////////////////////////
-template <class Y>
+template <class Y>
bool DiagGauss1DModel<Y>::readFields ( char* as[], int numFields ) {
if ( 0==strcmp(as[1],"m") && numFields>2 ) {
char* psT;
@@ -126,12 +126,12 @@ bool DiagGauss1DModel<Y>::readFields ( char* as[], int numFields ) {
}
////////////////////////////////////////
-template <class Y>
+template <class Y>
void DiagGauss1DModel<Y>::writeFields ( FILE* pf, const string& sPref ) const {
fprintf(pf,"%s m = ",sPref.c_str());
for(int i=0; i<getNumFeats(); i++) fprintf(pf,"%s%f",(0==i)?"":"_",getMean(i));
fprintf ( pf, "\n" ) ;
-
+
fprintf(pf,"%s v = ",sPref.c_str());
for(int i=0; i<getNumFeats(); i++) fprintf(pf,"%s%f",(0==i)?"":"_",getVariance(i));
fprintf ( pf, "\n" ) ;
@@ -141,7 +141,7 @@ void DiagGauss1DModel<Y>::writeFields ( FILE* pf, const string& sPref ) const {
////////////////////////////////////////////////////////////////////////////////
/*
-template <class Y,class X>
+template <class Y,class X>
class DiagGauss2DModel : public Generic2DModel<Y,X,PDFVal> {
private:
// Member variables...
@@ -177,7 +177,7 @@ class DiagGauss2DModel : public Generic2DModel<Y,X,PDFVal> {
////////////////////////////////////////////////////////////////////////////////
-template <class Y,class X1,class X2>
+template <class Y,class X1,class X2>
class DiagGauss3DModel : public Generic3DModel<Y,X1,X2,PDFVal> {
private:
// Member variables...
@@ -220,7 +220,7 @@ class DiagGauss3DModel : public Generic3DModel<Y,X1,X2,PDFVal> {
//
////////////////////////////////////////////////////////////////////////////////
-template <class Y>
+template <class Y>
class TrainableDiagGauss1DModel : public DiagGauss1DModel<Y> {
public:
TrainableDiagGauss1DModel ( ) : DiagGauss1DModel<Y>() { }
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-hash.h b/contrib/synlm/hhmm/rvtl/include/nl-hash.h
index 809284db9..b4d228b9c 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-hash.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-hash.h
@@ -54,7 +54,7 @@ class SimpleHash : public hash_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > /*pu
// tr1::unordered_map<X,Y,SimpleHashFn<X>,SimpleHashEqual<X> > mxy;
static const Y yDummy;
//static Y yNonconstDummy;
-
+
public:
// typedef typename OrigHash::const_iterator const_iterator;
// typedef typename OrigHash::iterator iterator;
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-hmm.h b/contrib/synlm/hhmm/rvtl/include/nl-hmm.h
index 2f6cd0104..c4414c4b7 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-hmm.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-hmm.h
@@ -209,7 +209,7 @@ template <class MY, class MX, class S, class B>
void HMM<MY,MX,S,B>::debugPrint() const{
for (int frame=0, numFrames=aatnTrellis.getxSize(); frame<numFrames; frame++) {
-
+
for (int beamIndex=0, beamSize=aatnTrellis.getySize(); beamIndex<beamSize; beamIndex++) {
if (aatnTrellis.get(frame,beamIndex).getLogProb().toDouble() > 0) {
@@ -306,7 +306,7 @@ void HMM<MY,MX,S,B>::updateRanked ( const typename MX::RandVarType& x, bool b1 )
// Add best transition (top of queue)...
//mx.getProb(o,my.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second));
if ( ashpiQueue.getSize() > 0 ) {
- S s; my.setTrellDat(s,ashpiQueue.getTop().second);
+ S s; my.setTrellDat(s,ashpiQueue.getTop().second);
bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,my.setBackDat(ashpiQueue.getTop().second)), ashpiQueue.getTop().third );
////cerr<<ashpiQueue.getSize()<<" queue elems A "<<ashpiQueue.getTop()<<"\n";
////cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
@@ -379,7 +379,7 @@ void HMM<MY,MX,S,B>::updateSerial ( const typename MX::RandVarType& x ) {
// Incorporate into trellis...
btn.tryAdd ( s, IB(i,my.setBackDat(y)), lgprFull );
//if(OUTPUT_VERYNOISY)
- // fprintf ( stderr," (S_t-1:[e^%0.6f] * Y:e^%0.6f * X:e^%0.6f = S_t:[e^%0.6f])\n",
+ // fprintf ( stderr," (S_t-1:[e^%0.6f] * Y:e^%0.6f * X:e^%0.6f = S_t:[e^%0.6f])\n",
// float(aatnTrellis.get(frameLast-1,i).getLogProb().toInt())/100.0,
// float(lgprY.toInt())/100.0,
// float(lgprX.toInt())/100.0,
@@ -389,7 +389,7 @@ void HMM<MY,MX,S,B>::updateSerial ( const typename MX::RandVarType& x ) {
}
// for(int i=0;i<BEAM_WIDTH;i++) {
-// fprintf(stderr,"> "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n");
+// fprintf(stderr,"> "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n");
// }
btn.sort(atnSorted);
@@ -429,8 +429,8 @@ void HMM<MY,MX,S,B>::each ( const typename MX::RandVarType& x, Beam<LogProb,S,IB
const TrellNode<S,B>& tnsbPrev = aatnTrellis.get(frameLast-1,i);
// If prob still not below beam minimum...
if ( tnsbPrev.getLogProb() > btn.getMin().getScore() ) {
- //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnsbPrev.getId().write(stderr); fprintf(stderr,"\n"); }
-
+ //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnsbPrev.getId().write(stderr); fprintf(stderr,"\n"); }
+
// For each possible transition...
const S& sPrev = tnsbPrev.getId();
typename MY::IterVal y;
@@ -447,7 +447,7 @@ void HMM<MY,MX,S,B>::each ( const typename MX::RandVarType& x, Beam<LogProb,S,IB
lgprX = mx.getProb(x,my.setTrellDat(s,y)); if ( !OUTPUT_VERYNOISY && LogProb()==lgprX ) continue;
#endif /////////////////////////////////////////////////////////////////
lgprFull = tnsbPrev.getLogProb() * lgprY * lgprX;
- if (OUTPUT_VERYNOISY) {
+ if (OUTPUT_VERYNOISY) {
boost::mutex::scoped_lock lock1(mutexHmmParanoiaLock);
//fprintf(stderr," TO: "); y.write(stderr); fprintf(stderr,"\n");
cout<<" "<<tnsbPrev.getId()<<" ==("<<tnsbPrev.getLogProb().toInt()<<"*"<<lgprY.toInt()<<"*"<<lgprX.toInt()<<"="<<lgprFull.toInt()<<")==> "<<y<<"\n";
@@ -459,7 +459,7 @@ void HMM<MY,MX,S,B>::each ( const typename MX::RandVarType& x, Beam<LogProb,S,IB
// Incorporate into trellis...
btn.tryAdd ( s, IB(i,my.setBackDat(y)), lgprFull );
// if(OUTPUT_VERYNOISY)
-// fprintf ( stderr," (S_t-1:[e^%0.6f] * Y:e^%0.6f * X:e^%0.6f = S_t:[e^%0.6f])\n",
+// fprintf ( stderr," (S_t-1:[e^%0.6f] * Y:e^%0.6f * X:e^%0.6f = S_t:[e^%0.6f])\n",
// float(aatnTrellis.get(frameLast-1,i).getLogProb().toInt())/100.0,
// float(lgprY.toInt())/100.0,
// float(lgprO.toInt())/100.0,
@@ -695,7 +695,7 @@ std::list<string> HMM<MY,MX,S,B>::getMLS(const S& sLast) const {
//// sprintf(tmp,"HYPOTH %04d> ", fr-1);
//// string tString(tmp);
//// tString +=
- string tString =
+ string tString =
//// aatnTrellis.get(fr,iBest).getId().getString() + " " +
aatnTrellis.get(fr,iBest).getBackData().getString()
//// + "\n"
@@ -737,7 +737,7 @@ template <class MY, class MX, class S, class B>
void HMM<MY,MX,S,B>::writeCurr ( ostream& os, int f=-1 ) const {
if ( -1==f ) f=frameLast;
if ( 0<=f && f<=frameLast )
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
//fprintf(pf,"at f=%04d b=%04d: ",f,i);
os<<"at "<<std::setfill('0')<<std::setw(4)<<f<<" "<<std::setw(4)<<i<<": ";
@@ -765,7 +765,7 @@ void HMM<MY,MX,S,B>::writeCurrSum ( FILE* pf, int f=-1 ) const {
if ( 0<=f && f<=frameLast ) {
LogProb sum = 0.0;
LogProb logtop = 0.0;
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
if(i==0) { logtop=aatnTrellis.get(f,i).getLogProb(); }
LogProb big1 = sum - logtop;
@@ -818,7 +818,7 @@ void HMM<MY,MX,S,B>::gatherElementsInBeam( SafeArray1D<Id<int>,pair<S,LogProb> >
result->init(BEAM_WIDTH);
if ( -1==f ) f=frameLast;
if ( 0<=f && f<=frameLast ) {
- for ( int i=0; i<BEAM_WIDTH && &(aatnTrellis.get(f,i))!=NULL; i++ ) {
+ for ( int i=0; i<BEAM_WIDTH && &(aatnTrellis.get(f,i))!=NULL; i++ ) {
result->set(i).first = aatnTrellis.get(f,i).getId();
result->set(i).second = aatnTrellis.get(f,i).getLogProb();
}
@@ -836,7 +836,7 @@ void HMM<MY,MX,S,B>::writeCurrEntropy ( FILE* pf, int f=-1 ) const {
if ( 0<=f && f<=frameLast ) {
LogProb logh = 0.0;
LogProb logtop = 0.0;
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
if(i==0) { logtop=aatnTrellis.get(f,i).getLogProb(); }
LogProb big1 = logh - logtop;
@@ -862,12 +862,12 @@ void HMM<MY,MX,S,B>::writeCurrDepths ( FILE* pf, int f=-1 ) const {
Array<int> depths = Array<int>();
Array<LogProb> logprobs = Array<LogProb>();
double avgdepth = 0.0;
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
if(i==0) { logtop=aatnTrellis.get(f,i).getLogProb(); }
logprobs.set(i) = aatnTrellis.get(f,i).getLogProb();
-
+
// loop over values in S node to find lowest meaningful depth
for ( int j=0; j<aatnTrellis.get(f,i).getId().first.getSize(); j++) {
// store the depth, if it's equal to G_BOT/G_BOT
@@ -996,7 +996,7 @@ int HMM<MY,MX,S,B>::getBeamUsed ( int f=-1 ) const {
if ( -1==f ) f=frameLast;
int ctr=0;
if ( 0<=f && f<=frameLast )
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
ctr++;
}
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-hmm2.h b/contrib/synlm/hhmm/rvtl/include/nl-hmm2.h
index 711d589be..04941088d 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-hmm2.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-hmm2.h
@@ -269,7 +269,7 @@ void HMM<MH,MO,X,B>::updateRanked ( const typename MO::RandVarType& o ) {
// Add best transition (top of queue)...
//mo.getProb(o,mh.setTrellDat(axhpiQueue.getTop().first,axhpiQueue.getTop().second));
if ( axhpiQueue.getSize() > 0 ) {
- X x; mh.setTrellDat(x,axhpiQueue.getTop().second);
+ X x; mh.setTrellDat(x,axhpiQueue.getTop().second);
bFull |= btn.tryAdd ( x, IB(axhpiQueue.getTop().first,mh.setBackDat(axhpiQueue.getTop().second)), axhpiQueue.getTop().third );
//cerr<<axhpiQueue.getSize()<<" queue elems A "<<axhpiQueue.getTop()<<"\n";
//cerr<<"/-----A-----\\\n + bFull: "<<bFull<<"\naxhpiQueue: \n"<<axhpiQueue<<"\\-----A-----/\n";
@@ -341,7 +341,7 @@ void HMM<MH,MO,X,B>::updateSerial ( const typename MO::RandVarType& o ) {
// Incorporate into trellis...
btn.tryAdd ( x, IB(i,mh.setBackDat(h)), lgprFull );
//if(OUTPUT_VERYNOISY)
- // fprintf ( stderr," (X_t-1:[e^%0.6f] * H:e^%0.6f * O:e^%0.6f = X_t:[e^%0.6f])\n",
+ // fprintf ( stderr," (X_t-1:[e^%0.6f] * H:e^%0.6f * O:e^%0.6f = X_t:[e^%0.6f])\n",
// float(aatnTrellis.get(frameLast-1,i).getLogProb().toInt())/100.0,
// float(lgprH.toInt())/100.0,
// float(lgprO.toInt())/100.0,
@@ -351,7 +351,7 @@ void HMM<MH,MO,X,B>::updateSerial ( const typename MO::RandVarType& o ) {
}
// for(int i=0;i<BEAM_WIDTH;i++) {
-// fprintf(stderr,"> "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n");
+// fprintf(stderr,"> "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n");
// }
btn.sort(atnSorted);
@@ -390,8 +390,8 @@ void HMM<MH,MO,X,B>::each ( const typename MO::RandVarType& o, Beam<LogProb,X,IB
const TrellNode<X,B>& tnxbPrev = aatnTrellis.get(frameLast-1,i);
// If prob still not below beam minimum...
if ( tnxbPrev.getLogProb() > btn.getMin().getScore() ) {
- //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnxbPrev.getId().write(stderr); fprintf(stderr,"\n"); }
-
+ //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnxbPrev.getId().write(stderr); fprintf(stderr,"\n"); }
+
// For each possible transition...
const X& xPrev = tnxbPrev.getId();
typename MH::IterVal h;
@@ -408,7 +408,7 @@ void HMM<MH,MO,X,B>::each ( const typename MO::RandVarType& o, Beam<LogProb,X,IB
lgprO = mo.getProb(o,mh.setTrellDat(x,h)); if ( !OUTPUT_VERYNOISY && LogProb()==lgprO ) continue;
#endif /////////////////////////////////////////////////////////////////
lgprFull = tnxbPrev.getLogProb() * lgprH * lgprO;
- if (OUTPUT_VERYNOISY) {
+ if (OUTPUT_VERYNOISY) {
boost::mutex::scoped_lock lock1(mutexHmmParanoiaLock);
//fprintf(stderr," TO: "); h.write(stderr); fprintf(stderr,"\n");
cout<<" "<<tnxbPrev.getId()<<" ==("<<tnxbPrev.getLogProb().toInt()<<"*"<<lgprH.toInt()<<"*"<<lgprO.toInt()<<"="<<lgprFull.toInt()<<")==> "<<h<<"\n";
@@ -420,7 +420,7 @@ void HMM<MH,MO,X,B>::each ( const typename MO::RandVarType& o, Beam<LogProb,X,IB
// Incorporate into trellis...
btn.tryAdd ( x, IB(i,mh.setBackDat(h)), lgprFull );
// if(OUTPUT_VERYNOISY)
-// fprintf ( stderr," (X_t-1:[e^%0.6f] * H:e^%0.6f * O:e^%0.6f = X_t:[e^%0.6f])\n",
+// fprintf ( stderr," (X_t-1:[e^%0.6f] * H:e^%0.6f * O:e^%0.6f = X_t:[e^%0.6f])\n",
// float(aatnTrellis.get(frameLast-1,i).getLogProb().toInt())/100.0,
// float(lgprH.toInt())/100.0,
// float(lgprO.toInt())/100.0,
@@ -656,7 +656,7 @@ std::list<string> HMM<MH,MO,X,B>::getMLS(const X& xLast) const {
//// sprintf(tmp,"HYPOTH %04d> ", fr-1);
//// string tString(tmp);
//// tString +=
- string tString =
+ string tString =
//// aatnTrellis.get(fr,iBest).getId().getString() + " " +
aatnTrellis.get(fr,iBest).getBackData().getString()
//// + "\n"
@@ -697,7 +697,7 @@ template <class MH, class MO, class X, class B>
void HMM<MH,MO,X,B>::writeCurr ( FILE* pf, int f=-1 ) const {
if ( -1==f ) f=frameLast;
if ( 0<=f && f<=frameLast )
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
fprintf(pf,"at f=%04d b=%04d: ",f,i);
String str; str<<aatnTrellis.get(f,i).getId(); //.write(pf);
@@ -721,7 +721,7 @@ void HMM<MH,MO,X,B>::writeCurrSum ( FILE* pf, int f=-1 ) const {
if ( 0<=f && f<=frameLast ) {
LogProb sum = 0.0;
LogProb logtop = 0.0;
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
if(i==0) { logtop=aatnTrellis.get(f,i).getLogProb(); }
LogProb big1 = sum - logtop;
@@ -741,7 +741,7 @@ void HMM<MH,MO,X,B>::writeCurrEntropy ( FILE* pf, int f=-1 ) const {
if ( 0<=f && f<=frameLast ) {
LogProb logh = 0.0;
LogProb logtop = 0.0;
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
if(i==0) { logtop=aatnTrellis.get(f,i).getLogProb(); }
LogProb big1 = logh - logtop;
@@ -768,12 +768,12 @@ void HMM<MH,MO,X,B>::writeCurrDepths ( FILE* pf, int f=-1 ) const {
Array<int> depths = Array<int>();
Array<LogProb> logprobs = Array<LogProb>();
double avgdepth = 0.0;
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
if(i==0) { logtop=aatnTrellis.get(f,i).getLogProb(); }
logprobs.set(i) = aatnTrellis.get(f,i).getLogProb();
-
+
// loop over values in S node to find lowest meaningful depth
for ( int j=0; j<aatnTrellis.get(f,i).getId().first.getSize(); j++) {
// store the depth, if it's equal to G_BOT/G_BOT
@@ -900,7 +900,7 @@ int HMM<MH,MO,X,B>::getBeamUsed ( int f=-1 ) const {
if ( -1==f ) f=frameLast;
int ctr=0;
if ( 0<=f && f<=frameLast )
- for ( int i=0; i<BEAM_WIDTH; i++ )
+ for ( int i=0; i<BEAM_WIDTH; i++ )
if(!(aatnTrellis.get(f,i).getLogProb() == LogProb())){
ctr++;
}
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h b/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h
index c476b4271..a8b8d5f27 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-hmmloop.h
@@ -348,7 +348,7 @@ const TrellNode<S,B>& HMMLoop<MY,MX,S,B>::update ( const typename MX::RandVarTyp
//modX.getProb(o,modY.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second));
if ( ashpiQueue.getSize() > 0 ) {
S s ( ashpiQueue.getTop().second );
- ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second);
+ ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second);
bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,B(ashpiQueue.getTop().second)), ashpiQueue.getTop().third );
////cerr<<ashpiQueue.getSize()<<" queue elems A "<<ashpiQueue.getTop()<<"\n";
////cerr<<"/-----A-----\\\n"<<ashpiQueue<<"\\-----A-----/\n";
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-linsep.h b/contrib/synlm/hhmm/rvtl/include/nl-linsep.h
index ac3ef3312..5c644a0fb 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-linsep.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-linsep.h
@@ -90,8 +90,8 @@ class Vector : public X {
Vector<X> operator- ( ElementType d ) const { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = X::get(i)-d; return vO; }
friend Vector<X> operator* ( ElementType d, const Vector<X>& v ) { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = d*v[i]; return vO; }
friend Vector<X> operator/ ( ElementType d, const Vector<X>& v ) { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = d/v[i]; return vO; }
- friend Vector<X> operator+ ( ElementType d, const Vector<X>& v ) { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = d+v[i]; return vO; }
- friend Vector<X> operator- ( ElementType d, const Vector<X>& v ) { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = d-v[i]; return vO; }
+ friend Vector<X> operator+ ( ElementType d, const Vector<X>& v ) { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = d+v[i]; return vO; }
+ friend Vector<X> operator- ( ElementType d, const Vector<X>& v ) { Vector<X> vO; for(uint i=0;i<SIZE;i++) vO[i] = d-v[i]; return vO; }
Vector<X>& operator*= ( ElementType d ) { for(uint i=0;i<SIZE;i++) X::set(i)*=d; return *this; }
Vector<X>& operator/= ( ElementType d ) { for(uint i=0;i<SIZE;i++) X::set(i)/=d; return *this; }
Vector<X>& operator+= ( ElementType d ) { for(uint i=0;i<SIZE;i++) X::set(i)+=d; return *this; }
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-mixture.h b/contrib/synlm/hhmm/rvtl/include/nl-mixture.h
index 2da5aacb2..3a88bea81 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-mixture.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-mixture.h
@@ -97,7 +97,7 @@ class Mixture3DModel : public Generic2DModel<Y,X,Prob> {
//
////////////////////////////////////////////////////////////////////////////////
-template <template <class MY> class M,class Y,class C>
+template <template <class MY> class M,class Y,class C>
class TrainableMixture2DModel : public Mixture2DModel<M,Y,C> {
// private:
// LogPDFVal logpdfPrevDataAvg;
@@ -110,7 +110,7 @@ class TrainableMixture2DModel : public Mixture2DModel<M,Y,C> {
};
////////////////////////////////////////
-template <template <class MY> class M,class Y,class C>
+template <template <class MY> class M,class Y,class C>
void TrainableMixture2DModel<M,Y,C>::updateFields ( const List<Joint2DRV<Y,Prob> >& lyp, const PDFVal WEIGHT_LIMIT, bool& bShouldStop ) {
LogPDFVal logpdfData = 0.0;
CPT1DModel<C,Prob> mprPseudoEmpC; // pseudo-empirical prob marginal
@@ -178,7 +178,7 @@ void TrainableMixture2DModel<M,Y,C>::updateFields ( const List<Joint2DRV<Y,Prob>
}
////////////////////////////////////////
-template <template <class MY> class M,class Y,class C>
+template <template <class MY> class M,class Y,class C>
void TrainableMixture2DModel<M,Y,C>::train ( List<Joint2DRV<Y,Prob> >& lyp, const int EPOCH_LIMIT, const PDFVal WEIGHT_LIMIT ) {
// Normalize model...
@@ -204,7 +204,7 @@ void TrainableMixture2DModel<M,Y,C>::train ( List<Joint2DRV<Y,Prob> >& lyp, cons
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
-template <template <class MY> class M,class Y,class X,class C>
+template <template <class MY> class M,class Y,class X,class C>
class TrainableMixture3DModel : public Generic2DModel<Y,X,C> {
private:
string sId;
@@ -225,7 +225,7 @@ class TrainableMixture3DModel : public Generic2DModel<Y,X,C> {
};
////////////////////////////////////////
-template <template <class MY> class M,class Y,class X,class C>
+template <template <class MY> class M,class Y,class X,class C>
void TrainableMixture3DModel<M,Y,X,C>::train ( const int EPOCH_LIMIT, const PDFVal WEIGHT_LIMIT ) {
// Update each subphone from list...
int ctr = 0;
@@ -237,7 +237,7 @@ void TrainableMixture3DModel<M,Y,X,C>::train ( const int EPOCH_LIMIT, const PDFV
}
////////////////////////////////////////
-template <template <class MY> class M,class Y,class X,class C>
+template <template <class MY> class M,class Y,class X,class C>
void TrainableMixture3DModel<M,Y,X,C>::train ( const List<Joint3DRV<X,Y,Prob> >& lxyp, const int EPOCH_LIMIT, const PDFVal WEIGHT_LIMIT ) {
// Chop list into phone-specific sub-lists...
ListedObject<Joint3DRV<X,Y,Prob> >* pxyp;
@@ -248,7 +248,7 @@ void TrainableMixture3DModel<M,Y,X,C>::train ( const List<Joint3DRV<X,Y,Prob> >&
}
////////////////////////////////////////
-template <template <class MY> class M,class Y,class X,class C>
+template <template <class MY> class M,class Y,class X,class C>
bool TrainableMixture3DModel<M,Y,X,C>::readData ( char* as[], int numFields ) {
if ( /*as[0]!=sId+"dat" ||*/ numFields!=3 ) return false;
alyp.set(X(as[1])).add() = Joint2DRV<Y,Prob>(Y(as[2]),Prob(1.0));
@@ -256,7 +256,7 @@ bool TrainableMixture3DModel<M,Y,X,C>::readData ( char* as[], int numFields ) {
}
////////////////////////////////////////
-template <template <class MY> class M,class Y,class X,class C>
+template <template <class MY> class M,class Y,class X,class C>
void TrainableMixture3DModel<M,Y,X,C>::writeFields ( FILE* pf, string sPref ) {
X x; for ( bool b=x.setFirst(); b; b=x.setNext() ) {
am.get(x).writeFields(pf,sPref+" "+x.getString());
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h b/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h
index dc6bec487..8b9730659 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-modelfile.h
@@ -37,7 +37,7 @@ void processModelFilePtr ( FILE* pf, bool rF(Array<char*>&) ) {
int i=0; int numFields=0; int c=' '; int line=1;
CONSUME_ALL(pf,c,WHITESPACE(c),line); // Get to first record
while ( c!=EOF ) { // For each record
- if ( c=='#' ) CONSUME_ALL(pf, c, c!='\n' && c!='\0', line ) ; // If comment, consume
+ if ( c=='#' ) CONSUME_ALL(pf, c, c!='\n' && c!='\0', line ) ; // If comment, consume
else { // If no comment,
Array<char*> aps(100);
String psBuff(1000);
@@ -49,7 +49,7 @@ void processModelFilePtr ( FILE* pf, bool rF(Array<char*>&) ) {
if (!z) break;
aps[i]=z;
}
-
+
if ( !rF(aps) ) // Try to process fields, else complain
fprintf( stderr, "\nERROR: %d %d-arg %s in line %d\n\n", numFields, aps.size(), aps[0], line);
}
@@ -75,7 +75,7 @@ void processModelSocket ( const int tSockfd, int& c, bool rF(Array<char*>&) ) {
int i=0; int numFields=0; int line=1;
CONSUME_ALL_SOCKET(tSockfd,c,WHITESPACE(c),line); // Get to first record
while ( c!='\0' && c!='\5' ) { // For each record
- if ( c=='#' ) CONSUME_ALL_SOCKET(tSockfd, c, (c!='\n' && c!='\0' && c!='\5'), line ) ; // If comment, consume
+ if ( c=='#' ) CONSUME_ALL_SOCKET(tSockfd, c, (c!='\n' && c!='\0' && c!='\5'), line ) ; // If comment, consume
else { // If no comment,
Array<char*> aps(100);
String psBuff(1000);
@@ -88,7 +88,7 @@ void processModelSocket ( const int tSockfd, int& c, bool rF(Array<char*>&) ) {
if (!z) break;
aps[i]=z;
}
-
+
if ( !rF(aps) ) // Try to process fields, else complain
fprintf( stderr, "\nERROR: %d-arg %s in line %d\n\n", numFields, aps[0], line);
}
@@ -97,7 +97,7 @@ void processModelSocket ( const int tSockfd, int& c, bool rF(Array<char*>&) ) {
}
void processModelSocket ( const int tSockfd, bool rF(Array<char*>&) ) {
- int c=' ';
+ int c=' ';
processModelSocket ( tSockfd, c, rF );
}
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-oblidtree.h b/contrib/synlm/hhmm/rvtl/include/nl-oblidtree.h
index 24c82e313..d5bfd5c8e 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-oblidtree.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-oblidtree.h
@@ -80,12 +80,12 @@ class binuint {
// Input / output methods...
friend StringInput operator>> ( StringInput si, binuint& i ) {
if(si==NULL) return si;
- i.b=0;
+ i.b=0;
for ( char c=si[0]; '0'<=c && c<='1'; ++si,c=si[0])
{ i.b=i.b*2+c-'0'; }
return si; }
- friend ostream& operator<< ( ostream& os, binuint i ) { for(int e=uint(log2(i.b));e>=0;e--)os <<((i.b>>e)%2); return os; }
- friend String& operator<< ( String& str, binuint i ) { for(int e=uint(log2(i.b));e>=0;e--)str<<((i.b>>e)%2); return str; }
+ friend ostream& operator<< ( ostream& os, binuint i ) { for(int e=uint(log2(i.b));e>=0;e--)os <<((i.b>>e)%2); return os; }
+ friend String& operator<< ( String& str, binuint i ) { for(int e=uint(log2(i.b));e>=0;e--)str<<((i.b>>e)%2); return str; }
};
////////////////////////////////////////////////////////////////////////////////
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-prob.h b/contrib/synlm/hhmm/rvtl/include/nl-prob.h
index 76cf2fb57..03211404b 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-prob.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-prob.h
@@ -43,7 +43,7 @@ class Prob {
Prob ( ) { gVal = 0.0; }
Prob (double d) { gVal = d; }
Prob (const char* ps) { gVal = atof(ps); }
-
+
operator double() const { return gVal; }
double toDouble() const { return gVal; }
Prob& operator+= ( const Prob p ) { gVal += p.gVal; return *this; }
@@ -54,7 +54,7 @@ class Prob {
friend ostream& operator<< ( ostream& os, const Prob& pr ) { return os<<pr.toDouble(); }
friend String& operator<< ( String& str, const Prob& pr ) { return str<<pr.toDouble(); }
friend pair<StringInput,Prob*> operator>> ( StringInput si, Prob& n ) { return pair<StringInput,Prob*>(si,&n); }
- friend StringInput operator>> ( pair<StringInput,Prob*> si_n, const char* psDlm ) {
+ friend StringInput operator>> ( pair<StringInput,Prob*> si_n, const char* psDlm ) {
double d=0.0; StringInput si=si_n.first>>d>>psDlm; *si_n.second=Prob(d); return si; }
};
@@ -129,7 +129,7 @@ class LogProb : public Id<int> {
friend ostream& operator<< ( ostream& os, const LogProb& lp ) { return os<<lp.toInt(); }
friend String& operator<< ( String& str, const LogProb& lp ) { return str<<lp.toInt(); }
friend pair<StringInput,LogProb*> operator>> ( StringInput si, LogProb& n ) { return pair<StringInput,LogProb*>(si,&n); }
- friend StringInput operator>> ( pair<StringInput,LogProb*> si_n, const char* psDlm ) {
+ friend StringInput operator>> ( pair<StringInput,LogProb*> si_n, const char* psDlm ) {
double d=0.0; StringInput si=si_n.first>>d>>psDlm; *si_n.second=LogProb(d); return si; }
};
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-probmodel.h b/contrib/synlm/hhmm/rvtl/include/nl-probmodel.h
index 2dcff7b30..2b0a0281c 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-probmodel.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-probmodel.h
@@ -33,7 +33,7 @@
//
////////////////////////////////////////////////////////////////////////////////
-template<class Y,class P>
+template<class Y,class P>
class Generic1DModel {
public:
typedef Y RVType;
@@ -45,7 +45,7 @@ class Generic1DModel {
////////////////////////////////////////////////////////////
-template<class Y,class X1,class P>
+template<class Y,class X1,class P>
class Generic2DModel {
public:
typedef Y RVType;
@@ -60,7 +60,7 @@ class Generic2DModel {
////////////////////////////////////////////////////////////
-template<class Y,class X1,class X2,class P>
+template<class Y,class X1,class X2,class P>
class Generic3DModel {
public:
typedef Y RVType;
@@ -76,7 +76,7 @@ class Generic3DModel {
////////////////////////////////////////////////////////////
-template<class Y,class X1,class X2,class X3,class P>
+template<class Y,class X1,class X2,class X3,class P>
class Generic4DModel {
public:
typedef Y RVType;
@@ -93,7 +93,7 @@ class Generic4DModel {
////////////////////////////////////////////////////////////
-template<class Y,class X1,class X2,class X3,class X4,class P>
+template<class Y,class X1,class X2,class X3,class X4,class P>
class Generic5DModel {
public:
typedef Y RVType;
@@ -111,7 +111,7 @@ class Generic5DModel {
////////////////////////////////////////////////////////////
-template<class Y,class X1,class X2,class X3,class X4,class X5,class P>
+template<class Y,class X1,class X2,class X3,class X4,class X5,class P>
class Generic6DModel {
public:
typedef Y RVType;
@@ -130,7 +130,7 @@ class Generic6DModel {
////////////////////////////////////////////////////////////
-template<class Y,class X1,class X2,class X3,class X4,class X5,class X6,class P>
+template<class Y,class X1,class X2,class X3,class X4,class X5,class X6,class P>
class Generic7DModel {
public:
typedef Y RVType;
@@ -302,7 +302,7 @@ class Modeled5DRV : public M::RVType {
const typename M::Dep2Type& x2,
const typename M::Dep3Type& x3,
const typename M::Dep4Type& x4 ) const { return m.getProb(*this,x1,x2,x3,x4); }
-
+
};
///////////////////////////////////////////////////////////////////////////////
@@ -346,7 +346,7 @@ class Modeled6DRV : public M::RVType {
const typename M::Dep3Type& x3,
const typename M::Dep4Type& x4,
const typename M::Dep5Type& x5 ) const { return m.getProb(*this,x1,x2,x3,x4,x5); }
-
+
};
///////////////////////////////////////////////////////////////////////////////
@@ -395,7 +395,7 @@ class Modeled7DRV : public M::RVType {
const typename M::Dep4Type& x4,
const typename M::Dep5Type& x5,
const typename M::Dep6Type& x6 ) const { return m.getProb(*this,x1,x2,x3,x4,x5,x6); }
-
+
};
///////////////////////////////////////////////////////////////////////////////
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-racpt.h b/contrib/synlm/hhmm/rvtl/include/nl-racpt.h
index 5d1502f1f..342e86de2 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-racpt.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-racpt.h
@@ -42,7 +42,7 @@ class GenericRACPTModel : public SimpleHash<K,P> {
return ( SimpleHash<K,P>::contains(k) );
}
-/*
+/*
P getProb ( const IterVal& ikyp, const K& k ) const {
if ( ikyp.iter.first == ikyp.iter.second ) { cerr<<"ERROR: no iterator to fix probability: "<<k<<endl; return P(); }
return ( ikyp.iter.first->second );
@@ -91,7 +91,7 @@ class GenericRACPTModel : public SimpleHash<K,P> {
for ( typename HKP::const_iterator ik=HKP::begin(); ik!=HKP::end(); ik++ ) {
K k=ik->first;
os << psId<<" "<<k<<" = "<<getProb(k).toDouble()<<endl;
-
+
// IterVal y;
// for ( bool b=setFirst(y,k); b; b=setNext(y,k) )
// os<<psId<<" "<<k<<" : "<<y<<" = "<<getProb(y,k).toDouble()<<"\n";
@@ -110,14 +110,14 @@ class GenericRACPTModel : public SimpleHash<K,P> {
friend pair<StringInput,GenericRACPTModel<K,P>*> operator>> ( StringInput si, GenericRACPTModel<K,P>& m ) {
return pair<StringInput,GenericRACPTModel<K,P>*>(si,&m); }
-
+
friend StringInput operator>> ( pair<StringInput,GenericRACPTModel<K,P>*> delimbuff, const char* psD ) {
- K k;
- StringInput si,si2,si3;
+ K k;
+ StringInput si,si2,si3;
GenericRACPTModel<K,P>& m = *delimbuff.second;
si=delimbuff.first;
if ( si==NULL ) return si;
-
+
// Kill the colon since we're treating the whole thing as the condition
char * str = si.c_str();
char * p = strchr(str, ':');
@@ -125,17 +125,17 @@ class GenericRACPTModel : public SimpleHash<K,P> {
p[0] = ' ';
}
si=str;
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>k>>" ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
si=si>>"= ";
- while((si2=si>>" ")!=NULL)si=si2;
+ while((si2=si>>" ")!=NULL)si=si2;
return (si!=NULL) ? si>>m.setProb(k)>>psD : si;
}
};
-template<class Y, class P>
+template<class Y, class P>
class RandAccCPT1DModel : public GenericRACPTModel<MapKey1D<Y>,P> {
public:
// typedef typename GenericCPTModel<Y,MapKey1D<Unit>,P>::IterVal IterVal;
@@ -170,7 +170,7 @@ P& setProb ( const Y& y ) {
////////////////////
-template<class Y, class X1, class P>
+template<class Y, class X1, class P>
class RandAccCPT2DModel : public GenericRACPTModel<MapKey2D<X1,Y>,P> {
public:
@@ -187,7 +187,7 @@ class RandAccCPT2DModel : public GenericRACPTModel<MapKey2D<X1,Y>,P> {
P getProb ( const Y& y, const X1& x1 ) const {
return GenericRACPTModel<MapKey2D<X1,Y>,P>::getProb ( MapKey2D<X1,Y>(x1,y) );
}
-
+
/*
P& setProb ( const Y& y, const X1& x1 ) {
cerr << "setProb called on racpt2d" << endl;
@@ -199,7 +199,7 @@ class RandAccCPT2DModel : public GenericRACPTModel<MapKey2D<X1,Y>,P> {
////////////////////
-template<class Y, class X1, class X2, class P>
+template<class Y, class X1, class X2, class P>
class RandAccCPT3DModel : public GenericRACPTModel<MapKey3D<X1,X2,Y>,P> {
public:
@@ -219,7 +219,7 @@ class RandAccCPT3DModel : public GenericRACPTModel<MapKey3D<X1,X2,Y>,P> {
/*
////////////////////
-template<class Y, class X1, class X2, class X3, class P>
+template<class Y, class X1, class X2, class X3, class P>
class CPT4DModel : public GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P> {
public:
typedef typename GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P>::IterVal IterVal;
@@ -256,7 +256,7 @@ class CPT4DModel : public GenericCPTModel<Y,MapKey3D<X1,X2,X3>,P> {
////////////////////
-template<class Y, class X1, class X2, class X3, class X4, class P>
+template<class Y, class X1, class X2, class X3, class X4, class P>
class CPT5DModel : public GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P> {
public:
typedef typename GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P>::IterVal IterVal;
@@ -293,7 +293,7 @@ class CPT5DModel : public GenericCPTModel<Y,MapKey4D<X1,X2,X3,X4>,P> {
////////////////////
-template<class Y, class X1, class X2, class X3, class X4, class X5, class P>
+template<class Y, class X1, class X2, class X3, class X4, class X5, class P>
class RACPT6DModel : public GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P> {
public:
typedef typename GenericCPTModel<Y,MapKey5D<X1,X2,X3,X4,X5>,P>::IterVal IterVal;
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-randvar.h b/contrib/synlm/hhmm/rvtl/include/nl-randvar.h
index 66cc0b8f2..b4caa2bde 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-randvar.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-randvar.h
@@ -129,7 +129,7 @@ class DiscreteDomainRV : public Id<T> {
friend pair<StringInput,DiscreteDomainRV<T,domain>*> operator>> ( const StringInput ps, DiscreteDomainRV<T,domain>& rv ) { return pair<StringInput,DiscreteDomainRV<T,domain>*>(ps,&rv); }
friend StringInput operator>> ( pair<StringInput,DiscreteDomainRV<T,domain>*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
- ////assert(*delimbuff.second<domain.getSize());
+ ////assert(*delimbuff.second<domain.getSize());
int j=0;
StringInput psIn = delimbuff.first;
if(psDlm[0]=='\0') { *delimbuff.second=psIn.c_str(); return psIn+strlen(psIn.c_str()); }
@@ -203,7 +203,7 @@ template <class T> const T RefRV<T>::DUMMY;
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////
-template<class V1,class V2>
+template<class V1,class V2>
class Joint2DRV {
public:
@@ -216,7 +216,7 @@ class Joint2DRV {
Joint2DRV ( const V1& v1, const V2& v2 ) { first=v1; second=v2; }
// Extraction methods...
- size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey();
+ size_t getHashKey ( ) const { size_t k=rotLeft(first.getHashKey(),3); k^=second.getHashKey();
/*fprintf(stderr," (%d) %d ^& %d = %d\n",sizeof(*this),x1.getHashKey(),x2.getHashKey(),k);*/ return k; }
bool operator< ( const Joint2DRV<V1,V2>& j ) const { return ( (first<j.first) ||
(first==j.first && second<j.second) ); }
@@ -276,7 +276,7 @@ class DelimitedJoint2DRV : public Joint2DRV<V1,V2> {
friend pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> operator>> ( StringInput ps, DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>& rv ) { return pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*>(ps,&rv); }
friend StringInput operator>> ( pair<StringInput,DelimitedJoint2DRV<SD1,V1,SD2,V2,SD3>*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
- return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>psDlm
+ return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>psDlm
: delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>psDlm );
}
};
@@ -290,7 +290,7 @@ class DelimitedJoint2DRV : public Joint2DRV<V1,V2> {
//
////////////////////////////////////////////////////////////////////////////////
-template<class V1,class V2,class V3>
+template<class V1,class V2,class V3>
class Joint3DRV {
public:
@@ -361,7 +361,7 @@ class DelimitedJoint3DRV : public Joint3DRV<V1,V2,V3> {
return pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*>(ps,&rv); }
friend StringInput operator>> ( pair<StringInput,DelimitedJoint3DRV<SD1,V1,SD2,V2,SD3,V3,SD4>*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
- return ( (SD4[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>psDlm
+ return ( (SD4[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>psDlm
: delimbuff.first>>SD1>>delimbuff.second->first>>SD2>>delimbuff.second->second>>SD3>>delimbuff.second->third>>SD4>>psDlm );
}
};
@@ -453,7 +453,7 @@ class DelimitedJoint4DRV : public Joint4DRV<V1,V2,V3,V4> {
//
////////////////////////////////////////////////////////////////////////////////
-template <int I, class T>
+template <int I, class T>
class JointArrayRV {
private:
// Data members...
@@ -491,7 +491,7 @@ class JointArrayRV {
////////////////////////////////////////////////////////////////////////////////
-template <int I, char* SD, class T>
+template <int I, char* SD, class T>
class DelimitedJointArrayRV : public JointArrayRV<I,T> {
public:
@@ -569,7 +569,7 @@ class History {
/*
void read ( char* ps, const ReaderContext& rc=ReaderContext() ) { char* psT; for(int i=0;i<N;i++){char* z=strtok_r((0==i)?ps:NULL,";",&psT); assert(z); at.set(i).read(z);} }
//at.set(i).read(strtok_r((0==i)?ps:NULL,";",&psT)); }
- */
+ */
friend ostream& operator<< ( ostream& os, const History<N,T>& a ) { for(int i=0;i<N;i++)os<<((i==0)?"":";")<<a.getBack(i); return os; }
friend pair<StringInput,History<N,T>*> operator>> ( StringInput ps, History<N,T>& a ) { return pair<StringInput,History<N,T>*>(ps,&a); }
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-safeids.h b/contrib/synlm/hhmm/rvtl/include/nl-safeids.h
index 50837c366..c5f9dcb67 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-safeids.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-safeids.h
@@ -30,7 +30,7 @@
#include "nl-stream.h"
#include <iostream>
-using namespace std;
+using namespace std;
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
@@ -39,7 +39,7 @@ using namespace std;
//
////////////////////////////////////////////////////////////////////////////////
-template <int I, class T>
+template <int I, class T>
class StaticSafeArray {
private:
// Data members...
@@ -84,7 +84,7 @@ class StaticSafeArray {
////////////////////////////////////////////////////////////////////////////////
-template <int I, char* SD, class T>
+template <int I, char* SD, class T>
class DelimitedStaticSafeArray : public StaticSafeArray<I,T> {
public:
DelimitedStaticSafeArray ( ) : StaticSafeArray<I,T>() { }
@@ -349,7 +349,7 @@ class SafeArray2D {
// Extraction methods...
const T& get (const X1& x,const X2& y) const { assert(at!=NULL);
assert(x.toInt()>=0); assert(x.toInt()<xSize);
- assert(y.toInt()>=0);
+ assert(y.toInt()>=0);
//this assert failed when compile without -DNDEBUG (needed for debugging). Have to figure out why before adding this assert back in
//assert(y.toInt()<ySize);
return at[x.toInt()*ySize + y.toInt()];}
@@ -423,7 +423,7 @@ class SafeArray4D {
{ delete[] at; wSize=sat.wSize; xSize=sat.xSize; ySize=sat.ySize;
zSize=sat.zSize; at=new T[wSize*xSize*ySize*zSize];
for(int i=0;i<wSize*xSize*ySize*zSize;i++) at[i]=sat.at[i]; return *this; }
- void init (int w,int x,int y,int z)
+ void init (int w,int x,int y,int z)
{ delete[] at; wSize=w; xSize=x; ySize=y; zSize=z; at=new T[w*x*y*z]; }
void init (int w,int x,int y,int z,const T& t)
{ delete[] at; wSize=w; xSize=x; ySize=y; zSize=z; at=new T[w*x*y*z];
@@ -472,7 +472,7 @@ class SafeArray5D {
{ delete[] at; vSize=sat.vSize; wSize=sat.wSize; xSize=sat.xSize;
ySize=sat.ySize; zSize=sat.zSize; at=new T[vSize*wSize*xSize*ySize*zSize];
for(int i=0;i<vSize*wSize*xSize*ySize*zSize;i++) at[i]=sat.at[i]; return *this; }
- void init(int v,int w,int x,int y,int z)
+ void init(int v,int w,int x,int y,int z)
{ delete[] at; vSize=v; wSize=w; xSize=x; ySize=y; zSize=z; at=new T[v*w*x*y*z]; }
void init(int v,int w,int x,int y,int z,const T& t)
{ delete[] at; vSize=v; wSize=w; xSize=x; ySize=y; zSize=z; at=new T[v*w*x*y*z];
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-stream.h b/contrib/synlm/hhmm/rvtl/include/nl-stream.h
index 8f743e12b..ee3b641fb 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-stream.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-stream.h
@@ -86,7 +86,7 @@ class IStream {
friend ostream& operator<< ( ostream& os, const IStream& is ) { return os<<is.iIndex<<","<<is.psrc<<","<<*is.psrc; }
// Match single char...
- friend IStream operator>> ( IStream is, char& c ) {
+ friend IStream operator>> ( IStream is, char& c ) {
// Propagate fail...
if (IStream()==is) return IStream();
c=is.get(is.iIndex);
@@ -106,7 +106,7 @@ class IStream {
// Match anything else followed by zero-terminated string delimiter...
template<class X> friend pair<IStream,X*> operator>> ( IStream is, X& x ) { return pair<IStream,X*>(is,&x); }
- template<class X> friend IStream operator>> ( pair<IStream,X*> is_x, const char* psDlm ) {
+ template<class X> friend IStream operator>> ( pair<IStream,X*> is_x, const char* psDlm ) {
IStream& is = is_x.first;
X& x = *is_x.second;
// Propagate fail...
@@ -129,7 +129,7 @@ class IStream {
}
// Match integer followed by zero-terminated string delimiter...
- friend IStream operator>> ( pair<IStream,int*> is_x, const char* psDlm ) {
+ friend IStream operator>> ( pair<IStream,int*> is_x, const char* psDlm ) {
IStream& is = is_x.first;
int& x = *is_x.second;
// Propagate fail...
@@ -151,7 +151,7 @@ class IStream {
}
// Match unsigned int followed by zero-terminated string delimiter...
- friend IStream operator>> ( pair<IStream,unsigned int*> is_x, const char* psDlm ) {
+ friend IStream operator>> ( pair<IStream,unsigned int*> is_x, const char* psDlm ) {
IStream& is = is_x.first;
unsigned int& x = *is_x.second;
// Propagate fail...
@@ -173,7 +173,7 @@ class IStream {
}
// Match float followed by zero-terminated string delimiter...
- friend IStream operator>> ( pair<IStream,float*> is_x, const char* psDlm ) {
+ friend IStream operator>> ( pair<IStream,float*> is_x, const char* psDlm ) {
IStream& is = is_x.first;
float& x = *is_x.second;
// Propagate fail...
@@ -195,7 +195,7 @@ class IStream {
}
// Match double followed by zero-terminated string delimiter...
- friend IStream operator>> ( pair<IStream,double*> is_x, const char* psDlm ) {
+ friend IStream operator>> ( pair<IStream,double*> is_x, const char* psDlm ) {
IStream& is = is_x.first;
double& x = *is_x.second;
// Propagate fail...
@@ -217,7 +217,7 @@ class IStream {
}
// Match void pointer followed by zero-terminated string delimiter...
- friend IStream operator>> ( pair<IStream,void**> is_x, const char* psDlm ) {
+ friend IStream operator>> ( pair<IStream,void**> is_x, const char* psDlm ) {
IStream& is = is_x.first;
// Propagate fail...
if (IStream()==is) return IStream();
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-string.h b/contrib/synlm/hhmm/rvtl/include/nl-string.h
index 1a7fc34ae..73e831539 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-string.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-string.h
@@ -68,13 +68,13 @@ class StringInput {
friend StringInput operator>> ( StringInput psIn, const char* psDlm ) {
if (StringInput(NULL)==psIn) return psIn;
int i;
- for (i=0; psIn[i]!='\0' && psDlm[i]!='\0'; i++)
+ for (i=0; psIn[i]!='\0' && psDlm[i]!='\0'; i++)
if(psIn[i]!=psDlm[i]) return StringInput(NULL); //psIn;
return (psDlm[i]!='\0') ? StringInput(NULL) : (psIn[i]!='\0') ? psIn+i : SI_EOS;
}
friend pair<StringInput,int*> operator>> ( StringInput ps, int& n ) { return pair<StringInput,int*>(ps,&n); }
- friend StringInput operator>> ( pair<StringInput,int*> delimbuff, const char* psDlm ) {
+ friend StringInput operator>> ( pair<StringInput,int*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
///int i; for(i=0;psIn[i]!='\0';i++) if(psIn[i]==psDlm[i]) return psIn; return psIn+i;
int j=0;
@@ -90,7 +90,7 @@ class StringInput {
}
friend pair<StringInput,unsigned int*> operator>> ( StringInput ps, unsigned int& n ) { return pair<StringInput,unsigned int*>(ps,&n); }
- friend StringInput operator>> ( pair<StringInput,unsigned int*> delimbuff, const char* psDlm ) {
+ friend StringInput operator>> ( pair<StringInput,unsigned int*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
///int i; for(i=0;psIn[i]!='\0';i++) if(psIn[i]==psDlm[i]) return psIn; return psIn+i;
int j=0;
@@ -106,7 +106,7 @@ class StringInput {
}
friend pair<StringInput,double*> operator>> ( StringInput ps, double& d ) { return pair<StringInput,double*>(ps,&d); }
- friend StringInput operator>> ( pair<StringInput,double*> delimbuff, const char* psDlm ) {
+ friend StringInput operator>> ( pair<StringInput,double*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
///int i; for(i=0;psIn[i]!='\0';i++) if(psIn[i]==psDlm[i]) return psIn; return psIn+i;
int j=0;
@@ -191,7 +191,7 @@ class String : public Array<char> {
friend pair<StringInput,String*> operator>> ( const StringInput ps, String& s ) { return pair<StringInput,String*>(ps,&s); }
friend StringInput operator>> ( pair<StringInput,String*> delimbuff, const char* psDlm ) {
if (StringInput(NULL)==delimbuff.first) return delimbuff.first;
- ////assert(*delimbuff.second<domain.getSize());
+ ////assert(*delimbuff.second<domain.getSize());
int j=0;
StringInput psIn = delimbuff.first;
if(psDlm[0]=='\0') { *delimbuff.second=String(psIn.c_str()); return psIn+strlen(psIn.c_str()); }
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-stringindex.h b/contrib/synlm/hhmm/rvtl/include/nl-stringindex.h
index 22931f081..933aba23d 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-stringindex.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-stringindex.h
@@ -38,7 +38,7 @@ class StringIndex{
map <string, int> msi;
map <int, string> mis;
int maxIndex;
-
+
public:
// Constructor / destructor methods...
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-tetrahex.h b/contrib/synlm/hhmm/rvtl/include/nl-tetrahex.h
index d77e4f471..60746bd53 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-tetrahex.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-tetrahex.h
@@ -22,7 +22,7 @@
///////////////////////////////////////////////////////////////////////////////
/***********************************************
- * nl-tetrahex.h
+ * nl-tetrahex.h
* a little header with some base conversion stuff
* so that we can represent base 16, 32 or 64 with
* one character.
diff --git a/contrib/synlm/hhmm/rvtl/include/nl-timer.h b/contrib/synlm/hhmm/rvtl/include/nl-timer.h
index 3fa7c5387..f6d08c992 100644
--- a/contrib/synlm/hhmm/rvtl/include/nl-timer.h
+++ b/contrib/synlm/hhmm/rvtl/include/nl-timer.h
@@ -41,7 +41,7 @@ class Timer {
}
double elapsed ( ) { // in milliseconds.
return (double(kept.tv_sec)*1000.0 + double(kept.tv_usec)/1000.0);
- //struct timeval end; gettimeofday(&end,NULL);
+ //struct timeval end; gettimeofday(&end,NULL);
//double beg_time_s = (double) beg.tv_sec + (double) ((double)beg.tv_usec / 1000000.0);
//double end_time_s = (double) end.tv_sec + (double) ((double)end.tv_usec / 1000000.0);
//return ( (end_time_s - beg_time_s) * 1000.0 );
diff --git a/contrib/synlm/hhmm/wsjparse/include/HHMMLangModel-gf.h b/contrib/synlm/hhmm/wsjparse/include/HHMMLangModel-gf.h
index 3987d6969..a8405ea13 100644
--- a/contrib/synlm/hhmm/wsjparse/include/HHMMLangModel-gf.h
+++ b/contrib/synlm/hhmm/wsjparse/include/HHMMLangModel-gf.h
@@ -136,7 +136,7 @@ class Rd : public DiscreteDomainRV<int,domRd> {
}
if (!hToG.contains(*this)) {
size_t i=s.find(',');
- assert(i!=string::npos);
+ assert(i!=string::npos);
hToG.set(*this) = G(s.substr(i+1).c_str());
if ( '1'==s[0] )
hFromG.set(G(s.substr(i+1).c_str())) = *this;
diff --git a/contrib/synlm/hhmm/wsjparse/include/TextObsModel.h b/contrib/synlm/hhmm/wsjparse/include/TextObsModel.h
index 5e277e857..57abe5ec6 100644
--- a/contrib/synlm/hhmm/wsjparse/include/TextObsModel.h
+++ b/contrib/synlm/hhmm/wsjparse/include/TextObsModel.h
@@ -42,11 +42,11 @@ typedef HidVarCPT2DModel<P,C,LogProb> PgivCModel;
class WModel {
private:
TrainableDTree2DModel<P,W,LogProb> modPgivWdt;
-
+
RandAccCPT2DModel<P,W,LogProb> modPgivWs;
RandAccCPT1DModel<P,LogProb> modP;
RandAccCPT1DModel<W,LogProb> modW;
-
+
public:
//LogProb getProb ( const W& w, const HidVarCPT1DModel<P,LogProb>::IterVal& p ) const {
LogProb getProb ( const W& w, const P::ArrayIterator<LogProb>& p ) const {
@@ -93,8 +93,8 @@ class OModel {
};
typedef DistribModeledWgivC RandVarType;
-
-
+
+
void calcProb ( OModel::RandVarType& o, const W& w ) const {
o.clear();
@@ -106,7 +106,7 @@ class OModel {
for (LogProb pr=modPgivC.setIterProb(p,c,aCtr); pr!=LogProb(); pr = modPgivC.setIterProb(p,c,aCtr=0) ){
o.setProb(c) += modPgivC.getProb(p,c).toProb() * modWgivP.getProb(w,p).toProb();
}
-
+
}
}
@@ -134,7 +134,7 @@ class XModel {
RandAccCPT2DModel<P,W,Prob> modPgivW;
RandAccCPT1DModel<P,Prob> modP;
RandAccCPT1DModel<W,Prob> modW;
-
+
public:
typedef X RandVarType;
diff --git a/lm/bhiksha.cc b/lm/bhiksha.cc
index c8a18dfda..4262b615e 100644
--- a/lm/bhiksha.cc
+++ b/lm/bhiksha.cc
@@ -11,12 +11,12 @@ namespace lm {
namespace ngram {
namespace trie {
-DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
+DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
next_(util::BitsMask::ByMax(max_next)) {}
const uint8_t kArrayBhikshaVersion = 0;
-// TODO: put this in binary file header instead when I change the binary file format again.
+// TODO: put this in binary file header instead when I change the binary file format again.
void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
uint8_t buffer[2];
file.ReadForConfig(buffer, 2, offset);
@@ -33,7 +33,7 @@ uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
uint8_t required = util::RequiredBits(max_next);
uint8_t best_chop = 0;
int64_t lowest_change = std::numeric_limits<int64_t>::max();
- // There are probably faster ways but I don't care because this is only done once per order at construction time.
+ // There are probably faster ways but I don't care because this is only done once per order at construction time.
for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */
- max_offset * static_cast<int64_t>(chop); /* savings in bits*/
diff --git a/lm/bhiksha.hh b/lm/bhiksha.hh
index 8ec8989c7..36438f1d2 100644
--- a/lm/bhiksha.hh
+++ b/lm/bhiksha.hh
@@ -7,7 +7,7 @@
* pages={388--391},
* }
*
- * Currently only used for next pointers.
+ * Currently only used for next pointers.
*/
#ifndef LM_BHIKSHA_H
@@ -86,9 +86,9 @@ class ArrayBhiksha {
// assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
--end_it;
// assert(end_it >= begin_it);
- out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
+ out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
- out.end = ((end_it - offset_begin_) << next_inline_.bits) |
+ out.end = ((end_it - offset_begin_) << next_inline_.bits) |
util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
// If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
assert(out.end >= out.begin);
diff --git a/lm/binary_format.cc b/lm/binary_format.cc
index 481174047..4ad893d44 100644
--- a/lm/binary_format.cc
+++ b/lm/binary_format.cc
@@ -135,7 +135,7 @@ void MatchCheck(ModelType model_type, unsigned int search_version, const Paramet
const std::size_t kInvalidSize = static_cast<std::size_t>(-1);
-BinaryFormat::BinaryFormat(const Config &config)
+BinaryFormat::BinaryFormat(const Config &config)
: write_method_(config.write_method), write_mmap_(config.write_mmap), load_method_(config.load_method),
header_size_(kInvalidSize), vocab_size_(kInvalidSize), vocab_string_offset_(kInvalidOffset) {}
diff --git a/lm/binary_format.hh b/lm/binary_format.hh
index 136d6b1aa..ff99b9574 100644
--- a/lm/binary_format.hh
+++ b/lm/binary_format.hh
@@ -19,18 +19,18 @@ namespace ngram {
extern const char *kModelNames[6];
-/*Inspect a file to determine if it is a binary lm. If not, return false.
+/*Inspect a file to determine if it is a binary lm. If not, return false.
* If so, return true and set recognized to the type. This is the only API in
- * this header designed for use by decoder authors.
+ * this header designed for use by decoder authors.
*/
bool RecognizeBinary(const char *file, ModelType &recognized);
struct FixedWidthParameters {
unsigned char order;
float probing_multiplier;
- // What type of model is this?
+ // What type of model is this?
ModelType model_type;
- // Does the end of the file have the actual strings in the vocabulary?
+ // Does the end of the file have the actual strings in the vocabulary?
bool has_vocabulary;
unsigned int search_version;
};
@@ -38,7 +38,7 @@ struct FixedWidthParameters {
// This is a macro instead of an inline function so constants can be assigned using it.
#define ALIGN8(a) ((std::ptrdiff_t(((a)-1)/8)+1)*8)
-// Parameters stored in the header of a binary file.
+// Parameters stored in the header of a binary file.
struct Parameters {
FixedWidthParameters fixed;
std::vector<uint64_t> counts;
@@ -79,7 +79,7 @@ class BinaryFormat {
const char *write_mmap_;
util::LoadMethod load_method_;
- // File behind memory, if any.
+ // File behind memory, if any.
util::scoped_fd file_;
// If there is a file involved, a single mapping.
diff --git a/lm/blank.hh b/lm/blank.hh
index 2107e1cb6..e09054c9b 100644
--- a/lm/blank.hh
+++ b/lm/blank.hh
@@ -15,9 +15,9 @@ namespace ngram {
* kNoExtensionBackoff. If the n-gram might be extended, then out_state must
* contain the full n-gram, in which case kExtensionBackoff is set. In any
* case, if an n-gram has non-zero backoff, the full state is returned so
- * backoff can be properly charged.
+ * backoff can be properly charged.
* These differ only in sign bit because the backoff is in fact zero in either
- * case.
+ * case.
*/
const float kNoExtensionBackoff = -0.0;
const float kExtensionBackoff = 0.0;
@@ -28,7 +28,7 @@ inline void SetExtension(float &backoff) {
if (backoff == kNoExtensionBackoff) backoff = kExtensionBackoff;
}
-// This compiles down nicely.
+// This compiles down nicely.
inline bool HasExtension(const float &backoff) {
typedef union { float f; uint32_t i; } UnionValue;
UnionValue compare, interpret;
diff --git a/lm/build_binary_main.cc b/lm/build_binary_main.cc
index 6d88a398d..35206e60b 100644
--- a/lm/build_binary_main.cc
+++ b/lm/build_binary_main.cc
@@ -56,7 +56,7 @@ void Usage(const char *name, const char *default_mem) {
exit(1);
}
-// I could really use boost::lexical_cast right about now.
+// I could really use boost::lexical_cast right about now.
float ParseFloat(const char *from) {
char *end;
float ret = strtod(from, &end);
diff --git a/lm/builder/adjust_counts.cc b/lm/builder/adjust_counts.cc
index 2dd3cef1b..bcaa71998 100644
--- a/lm/builder/adjust_counts.cc
+++ b/lm/builder/adjust_counts.cc
@@ -114,7 +114,7 @@ class CollapseStream {
current_(NULL, NGram::OrderFromSize(position.GetChain().EntrySize())),
prune_threshold_(prune_threshold),
prune_words_(prune_words),
- block_(position) {
+ block_(position) {
StartBlock();
}
@@ -125,27 +125,27 @@ class CollapseStream {
CollapseStream &operator++() {
assert(block_);
-
+
if (current_.begin()[1] == kBOS && current_.Base() < copy_from_) {
memcpy(current_.Base(), copy_from_, current_.TotalSize());
UpdateCopyFrom();
-
+
// Mark highest order n-grams for later pruning
if(current_.Count() <= prune_threshold_) {
- current_.Mark();
+ current_.Mark();
}
-
+
if(!prune_words_.empty()) {
for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
if(prune_words_[*i]) {
- current_.Mark();
+ current_.Mark();
break;
}
}
}
-
+
}
-
+
current_.NextInMemory();
uint8_t *block_base = static_cast<uint8_t*>(block_->Get());
if (current_.Base() == block_base + block_->ValidSize()) {
@@ -153,21 +153,21 @@ class CollapseStream {
++block_;
StartBlock();
}
-
+
// Mark highest order n-grams for later pruning
if(current_.Count() <= prune_threshold_) {
- current_.Mark();
+ current_.Mark();
}
if(!prune_words_.empty()) {
for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
if(prune_words_[*i]) {
- current_.Mark();
+ current_.Mark();
break;
}
}
}
-
+
return *this;
}
@@ -180,21 +180,21 @@ class CollapseStream {
current_.ReBase(block_->Get());
copy_from_ = static_cast<uint8_t*>(block_->Get()) + block_->ValidSize();
UpdateCopyFrom();
-
+
// Mark highest order n-grams for later pruning
if(current_.Count() <= prune_threshold_) {
- current_.Mark();
+ current_.Mark();
}
if(!prune_words_.empty()) {
for(WordIndex* i = current_.begin(); i != current_.end(); i++) {
if(prune_words_[*i]) {
- current_.Mark();
+ current_.Mark();
break;
}
}
}
-
+
}
// Find last without bos.
@@ -222,18 +222,18 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
StatCollector stats(order, counts_, counts_pruned_, discounts_);
if (order == 1) {
- // Only unigrams. Just collect stats.
+ // Only unigrams. Just collect stats.
for (NGramStream full(positions[0]); full; ++full) {
-
+
// Do not prune <s> </s> <unk>
if(*full->begin() > 2) {
if(full->Count() <= prune_thresholds_[0])
full->Mark();
-
+
if(!prune_words_.empty() && prune_words_[*full->begin()])
full->Mark();
}
-
+
stats.AddFull(full->UnmarkedCount(), full->IsMarked());
}
@@ -243,7 +243,7 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
NGramStreams streams;
streams.Init(positions, positions.size() - 1);
-
+
CollapseStream full(positions[positions.size() - 1], prune_thresholds_.back(), prune_words_);
// Initialization: <unk> has count 0 and so does <s>.
@@ -261,7 +261,7 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
std::vector<uint64_t> actual_counts(positions.size(), 0);
// Something of a hack: don't prune <s>.
actual_counts[0] = std::numeric_limits<uint64_t>::max();
-
+
// Iterate over full (the stream of the highest order ngrams)
for (; full; ++full) {
const WordIndex *different = FindDifference(*full, **lower_valid);
@@ -272,16 +272,16 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
uint64_t order_minus_1 = lower_valid - streams_begin;
if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1])
(*lower_valid)->Mark();
-
+
if(!prune_words_.empty()) {
for(WordIndex* i = (*lower_valid)->begin(); i != (*lower_valid)->end(); i++) {
if(prune_words_[*i]) {
- (*lower_valid)->Mark();
+ (*lower_valid)->Mark();
break;
}
}
}
-
+
stats.Add(order_minus_1, (*lower_valid)->UnmarkedCount(), (*lower_valid)->IsMarked());
++*lower_valid;
}
@@ -327,16 +327,16 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
uint64_t lower_count = actual_counts[(*s)->Order() - 1];
if(lower_count <= prune_thresholds_[(*s)->Order() - 1])
(*s)->Mark();
-
+
if(!prune_words_.empty()) {
for(WordIndex* i = (*s)->begin(); i != (*s)->end(); i++) {
if(prune_words_[*i]) {
- (*s)->Mark();
+ (*s)->Mark();
break;
}
}
}
-
+
stats.Add(s - streams.begin(), lower_count, (*s)->IsMarked());
++*s;
}
diff --git a/lm/builder/adjust_counts.hh b/lm/builder/adjust_counts.hh
index b169950e9..29319ba06 100644
--- a/lm/builder/adjust_counts.hh
+++ b/lm/builder/adjust_counts.hh
@@ -30,9 +30,9 @@ struct DiscountConfig {
WarningAction bad_action;
};
-/* Compute adjusted counts.
+/* Compute adjusted counts.
* Input: unique suffix sorted N-grams (and just the N-grams) with raw counts.
- * Output: [1,N]-grams with adjusted counts.
+ * Output: [1,N]-grams with adjusted counts.
* [1,N)-grams are in suffix order
* N-grams are in undefined order (they're going to be sorted anyway).
*/
@@ -50,13 +50,13 @@ class AdjustCounts {
const DiscountConfig &discount_config,
std::vector<Discount> &discounts)
: prune_thresholds_(prune_thresholds), counts_(counts), counts_pruned_(counts_pruned),
- prune_words_(prune_words), discount_config_(discount_config), discounts_(discounts)
+ prune_words_(prune_words), discount_config_(discount_config), discounts_(discounts)
{}
void Run(const util::stream::ChainPositions &positions);
private:
- const std::vector<uint64_t> &prune_thresholds_;
+ const std::vector<uint64_t> &prune_thresholds_;
std::vector<uint64_t> &counts_;
std::vector<uint64_t> &counts_pruned_;
const std::vector<bool> &prune_words_;
diff --git a/lm/builder/adjust_counts_test.cc b/lm/builder/adjust_counts_test.cc
index 353e3dd35..2a9d78ae0 100644
--- a/lm/builder/adjust_counts_test.cc
+++ b/lm/builder/adjust_counts_test.cc
@@ -82,7 +82,7 @@ BOOST_AUTO_TEST_CASE(Simple) {
}
BOOST_REQUIRE_EQUAL(4UL, counts.size());
BOOST_CHECK_EQUAL(4UL, counts[0]);
- // These are no longer set because the discounts are bad.
+ // These are no longer set because the discounts are bad.
/* BOOST_CHECK_EQUAL(4UL, counts[1]);
BOOST_CHECK_EQUAL(3UL, counts[2]);
BOOST_CHECK_EQUAL(3UL, counts[3]);*/
diff --git a/lm/builder/corpus_count.cc b/lm/builder/corpus_count.cc
index 7f3dafa27..889eeb7a9 100644
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@@ -45,7 +45,7 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
std::size_t operator()(const WordIndex *start) const {
return util::MurmurHashNative(start, size_);
}
-
+
private:
const std::size_t size_;
};
@@ -53,11 +53,11 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
public:
explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
-
+
bool operator()(const WordIndex *first, const WordIndex *second) const {
return !memcmp(first, second, size_);
- }
-
+ }
+
private:
const std::size_t size_;
};
@@ -82,7 +82,7 @@ typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;
class Writer {
public:
- Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
+ Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
: block_(position), gram_(block_->Get(), order),
dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
@@ -91,7 +91,7 @@ class Writer {
dedupe_.Clear();
assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
if (order == 1) {
- // Add special words. AdjustCounts is responsible if order != 1.
+ // Add special words. AdjustCounts is responsible if order != 1.
AddUnigramWord(kUNK);
AddUnigramWord(kBOS);
}
@@ -121,16 +121,16 @@ class Writer {
memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
return;
}
- // Complete the write.
+ // Complete the write.
gram_.Count() = 1;
- // Prepare the next n-gram.
+ // Prepare the next n-gram.
if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
NGram last(gram_);
gram_.NextInMemory();
std::copy(last.begin() + 1, last.end(), gram_.begin());
return;
}
- // Block end. Need to store the context in a temporary buffer.
+ // Block end. Need to store the context in a temporary buffer.
std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
dedupe_.Clear();
block_->SetValidSize(block_size_);
@@ -158,7 +158,7 @@ class Writer {
// Hash table combiner implementation.
Dedupe dedupe_;
- // Small buffer to hold existing ngrams when shifting across a block boundary.
+ // Small buffer to hold existing ngrams when shifting across a block boundary.
boost::scoped_array<WordIndex> buffer_;
const std::size_t block_size_;
@@ -224,12 +224,12 @@ void CorpusCount::Run(const util::stream::ChainPosition &position) {
} catch (const util::EndOfFileException &e) {}
token_count_ = count;
type_count_ = vocab.Size();
-
+
// Create list of unigrams that are supposed to be pruned
if (!prune_vocab_filename_.empty()) {
try {
util::FilePiece prune_vocab_file(prune_vocab_filename_.c_str());
-
+
prune_words_.resize(vocab.Size(), true);
try {
while (true) {
@@ -238,12 +238,12 @@ void CorpusCount::Run(const util::stream::ChainPosition &position) {
prune_words_[vocab.Index(*w)] = false;
}
} catch (const util::EndOfFileException &e) {}
-
+
// Never prune <unk>, <s>, </s>
prune_words_[kUNK] = false;
prune_words_[kBOS] = false;
prune_words_[kEOS] = false;
-
+
} catch (const util::Exception &e) {
std::cerr << e.what() << std::endl;
abort();
diff --git a/lm/builder/corpus_count.hh b/lm/builder/corpus_count.hh
index d3121ca45..165505c4a 100644
--- a/lm/builder/corpus_count.hh
+++ b/lm/builder/corpus_count.hh
@@ -40,7 +40,7 @@ class CorpusCount {
uint64_t &token_count_;
WordIndex &type_count_;
std::vector<bool>& prune_words_;
- const std::string& prune_vocab_filename_;
+ const std::string& prune_vocab_filename_;
std::size_t dedupe_mem_size_;
util::scoped_malloc dedupe_mem_;
diff --git a/lm/builder/initial_probabilities.cc b/lm/builder/initial_probabilities.cc
index b1dd96f31..80063eb2e 100644
--- a/lm/builder/initial_probabilities.cc
+++ b/lm/builder/initial_probabilities.cc
@@ -27,9 +27,9 @@ struct HashBufferEntry : public BufferEntry {
uint64_t hash_value;
};
-// Reads all entries in order like NGramStream does.
+// Reads all entries in order like NGramStream does.
// But deletes any entries that have CutoffCount below or equal to pruning
-// threshold.
+// threshold.
class PruneNGramStream {
public:
PruneNGramStream(const util::stream::ChainPosition &position) :
@@ -37,7 +37,7 @@ class PruneNGramStream {
dest_(NULL, NGram::OrderFromSize(position.GetChain().EntrySize())),
currentCount_(0),
block_(position)
- {
+ {
StartBlock();
}
@@ -50,7 +50,7 @@ class PruneNGramStream {
PruneNGramStream &operator++() {
assert(block_);
-
+
if(current_.Order() == 1 && *current_.begin() <= 2)
dest_.NextInMemory();
else if(currentCount_ > 0) {
@@ -59,9 +59,9 @@ class PruneNGramStream {
}
dest_.NextInMemory();
}
-
+
current_.NextInMemory();
-
+
uint8_t *block_base = static_cast<uint8_t*>(block_->Get());
if (current_.Base() == block_base + block_->ValidSize()) {
block_->SetValidSize(dest_.Base() - block_base);
@@ -70,13 +70,13 @@ class PruneNGramStream {
if (block_) {
currentCount_ = current_.CutoffCount();
}
- } else {
+ } else {
currentCount_ = current_.CutoffCount();
}
-
+
return *this;
}
-
+
private:
void StartBlock() {
for (; ; ++block_) {
@@ -85,13 +85,13 @@ class PruneNGramStream {
}
current_.ReBase(block_->Get());
currentCount_ = current_.CutoffCount();
-
+
dest_.ReBase(block_->Get());
}
NGram current_; // input iterator
NGram dest_; // output iterator
-
+
uint64_t currentCount_;
util::stream::Link block_;
@@ -155,24 +155,24 @@ class AddRight {
memcpy(previous_raw, in->begin(), size);
uint64_t denominator = 0;
uint64_t normalizer = 0;
-
+
uint64_t counts[4];
memset(counts, 0, sizeof(counts));
do {
denominator += in->UnmarkedCount();
-
+
// Collect unused probability mass from pruning.
// Becomes 0 for unpruned ngrams.
normalizer += in->UnmarkedCount() - in->CutoffCount();
-
+
// Chen&Goodman do not mention counting based on cutoffs, but
// backoff becomes larger than 1 otherwise, so probably needs
// to count cutoffs. Counts normally without pruning.
if(in->CutoffCount() > 0)
++counts[std::min(in->CutoffCount(), static_cast<uint64_t>(3))];
-
+
} while (++in && !memcmp(previous_raw, in->begin(), size));
-
+
BufferEntry &entry = *reinterpret_cast<BufferEntry*>(out.Get());
entry.denominator = static_cast<float>(denominator);
entry.gamma = 0.0;
@@ -182,9 +182,9 @@ class AddRight {
// Makes model sum to 1 with pruning (I hope).
entry.gamma += normalizer;
-
+
entry.gamma /= entry.denominator;
-
+
if(pruning_) {
// If pruning is enabled the stream actually contains HashBufferEntry, see InitialProbabilities(...),
// so add a hash value that identifies the current ngram.
@@ -244,13 +244,13 @@ class MergeRight {
++summed;
return;
}
-
+
std::vector<WordIndex> previous(grams->Order() - 1);
const std::size_t size = sizeof(WordIndex) * previous.size();
for (; grams; ++summed) {
memcpy(&previous[0], grams->begin(), size);
const BufferEntry &sums = *static_cast<const BufferEntry*>(summed.Get());
-
+
do {
Payload &pay = grams->Value();
pay.uninterp.prob = discount_.Apply(grams->UnmarkedCount()) / sums.denominator;
@@ -288,7 +288,7 @@ void InitialProbabilities(
gamma_out[i] >> AddRight(discounts[i], second, prune_vocab || prune_thresholds[i] > 0);
primary[i] >> MergeRight(config.interpolate_unigrams, gamma_out[i].Add(), discounts[i]);
-
+
// Don't bother with the OnlyGamma thread for something to discard.
if (i) gamma_out[i] >> OnlyGamma(prune_vocab || prune_thresholds[i] > 0);
}
diff --git a/lm/builder/initial_probabilities.hh b/lm/builder/initial_probabilities.hh
index 57e09cd51..a8ecf4dc2 100644
--- a/lm/builder/initial_probabilities.hh
+++ b/lm/builder/initial_probabilities.hh
@@ -15,17 +15,17 @@ struct InitialProbabilitiesConfig {
// These should be small buffers to keep the adder from getting too far ahead
util::stream::ChainConfig adder_in;
util::stream::ChainConfig adder_out;
- // SRILM doesn't normally interpolate unigrams.
+ // SRILM doesn't normally interpolate unigrams.
bool interpolate_unigrams;
};
/* Compute initial (uninterpolated) probabilities
* primary: the normal chain of n-grams. Incoming is context sorted adjusted
* counts. Outgoing has uninterpolated probabilities for use by Interpolate.
- * second_in: a second copy of the primary input. Discard the output.
+ * second_in: a second copy of the primary input. Discard the output.
* gamma_out: Computed gamma values are output on these chains in suffix order.
* The values are bare floats and should be buffered for interpolation to
- * use.
+ * use.
*/
void InitialProbabilities(
const InitialProbabilitiesConfig &config,
diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc
index 0f9b98162..5b04cb3ff 100644
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@@ -47,7 +47,7 @@ class OutputQ {
private:
// Product of backoffs in the numerator divided by backoffs in the
- // denominator. Does not include
+ // denominator. Does not include
std::vector<float> q_delta_;
};
@@ -81,7 +81,7 @@ template <class Output> class Callback {
if(prune_vocab_ || prune_thresholds_[i + 1] > 0)
while(backoffs_[i])
++backoffs_[i];
-
+
if (backoffs_[i]) {
std::cerr << "Backoffs do not match for order " << (i + 1) << std::endl;
abort();
@@ -99,7 +99,7 @@ template <class Output> class Callback {
if(prune_vocab_ || prune_thresholds_[order_minus_1 + 1] > 0) {
//Compute hash value for current context
uint64_t current_hash = util::MurmurHashNative(gram.begin(), gram.Order() * sizeof(WordIndex));
-
+
const HashGamma *hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
while(current_hash != hashed_backoff->hash_value && ++backoffs_[order_minus_1])
hashed_backoff = static_cast<const HashGamma*>(backoffs_[order_minus_1].Get());
diff --git a/lm/builder/interpolate.hh b/lm/builder/interpolate.hh
index adfd9198f..207a16dfd 100644
--- a/lm/builder/interpolate.hh
+++ b/lm/builder/interpolate.hh
@@ -8,8 +8,8 @@
#include <stdint.h>
namespace lm { namespace builder {
-
-/* Interpolate step.
+
+/* Interpolate step.
* Input: suffix sorted n-grams with (p_uninterpolated, gamma) from
* InitialProbabilities.
* Output: suffix sorted n-grams with complete probability
diff --git a/lm/builder/joint_order.hh b/lm/builder/joint_order.hh
index 1728706dd..b05ef67fd 100644
--- a/lm/builder/joint_order.hh
+++ b/lm/builder/joint_order.hh
@@ -35,7 +35,7 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
// Does the context match the lower one?
if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
callback.Enter(current, *streams[current]);
- // Transition to looking for extensions.
+ // Transition to looking for extensions.
if (++current < order) continue;
}
#ifdef DEBUG
@@ -46,16 +46,16 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
abort();
}
#endif // DEBUG
- // No extension left.
+ // No extension left.
while(true) {
assert(current > 0);
--current;
callback.Exit(current, *streams[current]);
-
+
if (++streams[current]) break;
-
+
UTIL_THROW_IF(order != current + 1, FormatLoadException, "Detected n-gram without matching suffix");
-
+
order = current;
if (!order) return;
}
diff --git a/lm/builder/lmplz_main.cc b/lm/builder/lmplz_main.cc
index 65ec55729..5c9d86deb 100644
--- a/lm/builder/lmplz_main.cc
+++ b/lm/builder/lmplz_main.cc
@@ -53,7 +53,7 @@ std::vector<uint64_t> ParsePruning(const std::vector<std::string> &param, std::s
// throw if each n-gram order has not threshold specified
UTIL_THROW_IF(prune_thresholds.size() > order, util::Exception, "You specified pruning thresholds for orders 1 through " << prune_thresholds.size() << " but the model only has order " << order);
// threshold for unigram can only be 0 (no pruning)
-
+
// check if threshold are not in decreasing order
uint64_t lower_threshold = 0;
for (std::vector<uint64_t>::iterator it = prune_thresholds.begin(); it != prune_thresholds.end(); ++it) {
@@ -124,7 +124,7 @@ int main(int argc, char *argv[]) {
po::store(po::parse_command_line(argc, argv, options), vm);
if (argc == 1 || vm["help"].as<bool>()) {
- std::cerr <<
+ std::cerr <<
"Builds unpruned language models with modified Kneser-Ney smoothing.\n\n"
"Please cite:\n"
"@inproceedings{Heafield-estimate,\n"
@@ -147,7 +147,7 @@ int main(int argc, char *argv[]) {
std::cerr << "This machine has " << mem << " bytes of memory.\n\n";
} else {
std::cerr << "Unable to determine the amount of memory on this machine.\n\n";
- }
+ }
std::cerr << options << std::endl;
return 1;
}
@@ -191,11 +191,11 @@ int main(int argc, char *argv[]) {
else {
pipeline.prune_vocab = false;
}
-
+
util::NormalizeTempPrefix(pipeline.sort.temp_prefix);
lm::builder::InitialProbabilitiesConfig &initial = pipeline.initial_probs;
- // TODO: evaluate options for these.
+ // TODO: evaluate options for these.
initial.adder_in.total_memory = 32768;
initial.adder_in.block_count = 2;
initial.adder_out.total_memory = 32768;
diff --git a/lm/builder/ngram.hh b/lm/builder/ngram.hh
index 4525b3421..d0033206c 100644
--- a/lm/builder/ngram.hh
+++ b/lm/builder/ngram.hh
@@ -68,26 +68,26 @@ class NGram {
assert(size == TotalSize(ret));
return ret;
}
-
+
// manipulate msb to signal that ngram can be pruned
/*mjd**********************************************************************/
bool IsMarked() const {
return Value().count >> (sizeof(Value().count) * 8 - 1);
}
-
+
void Mark() {
Value().count |= (1ul << (sizeof(Value().count) * 8 - 1));
}
-
+
void Unmark() {
Value().count &= ~(1ul << (sizeof(Value().count) * 8 - 1));
}
-
+
uint64_t UnmarkedCount() const {
return Value().count & ~(1ul << (sizeof(Value().count) * 8 - 1));
}
-
+
uint64_t CutoffCount() const {
return IsMarked() ? 0 : UnmarkedCount();
}
diff --git a/lm/builder/pipeline.cc b/lm/builder/pipeline.cc
index fced0e3bd..1ca2e26f5 100644
--- a/lm/builder/pipeline.cc
+++ b/lm/builder/pipeline.cc
@@ -37,7 +37,7 @@ void PrintStatistics(const std::vector<uint64_t> &counts, const std::vector<uint
class Master {
public:
- explicit Master(PipelineConfig &config)
+ explicit Master(PipelineConfig &config)
: config_(config), chains_(config.order), files_(config.order) {
config_.minimum_block = std::max(NGram::TotalSize(config_.order), config_.minimum_block);
}
@@ -64,7 +64,7 @@ class Master {
CreateChains(config_.TotalMemory() - merge_using, count_bounds);
ngrams.Output(chains_.back(), merge_using);
- // Setup unigram file.
+ // Setup unigram file.
files_.push_back(util::MakeTemp(config_.TempPrefix()));
}
@@ -204,7 +204,7 @@ class Master {
PipelineConfig &config_;
util::stream::Chains chains_;
- // Often only unigrams, but sometimes all orders.
+ // Often only unigrams, but sometimes all orders.
util::FixedArray<util::stream::FileBuffer> files_;
};
@@ -214,7 +214,7 @@ void CountText(int text_file /* input */, int vocab_file /* output */, Master &m
const std::size_t vocab_usage = CorpusCount::VocabUsage(config.vocab_estimate);
UTIL_THROW_IF(config.TotalMemory() < vocab_usage, util::Exception, "Vocab hash size estimate " << vocab_usage << " exceeds total memory " << config.TotalMemory());
- std::size_t memory_for_chain =
+ std::size_t memory_for_chain =
// This much memory to work with after vocab hash table.
static_cast<float>(config.TotalMemory() - vocab_usage) /
// Solve for block size including the dedupe multiplier for one block.
@@ -252,7 +252,7 @@ void InitialProbabilities(const std::vector<uint64_t> &counts, const std::vector
util::stream::Chains gamma_chains(config.order);
InitialProbabilities(config.initial_probs, discounts, master.MutableChains(), second, gamma_chains, prune_thresholds, prune_vocab);
- // Don't care about gamma for 0.
+ // Don't care about gamma for 0.
gamma_chains[0] >> util::stream::kRecycle;
gammas.Init(config.order - 1);
for (std::size_t i = 1; i < config.order; ++i) {
@@ -307,16 +307,16 @@ void Pipeline(PipelineConfig &config, int text_file, Output &output) {
// master's destructor will wait for chains. But they might be deadlocked if
// this thread dies because e.g. it ran out of memory.
try {
- util::scoped_fd vocab_file(config.vocab_file.empty() ?
- util::MakeTemp(config.TempPrefix()) :
+ util::scoped_fd vocab_file(config.vocab_file.empty() ?
+ util::MakeTemp(config.TempPrefix()) :
util::CreateOrThrow(config.vocab_file.c_str()));
output.SetVocabFD(vocab_file.get());
uint64_t token_count;
std::string text_file_name;
-
+
std::vector<bool> prune_words;
CountText(text_file, vocab_file.get(), master, token_count, text_file_name, prune_words);
-
+
std::vector<uint64_t> counts;
std::vector<uint64_t> counts_pruned;
std::vector<Discount> discounts;
diff --git a/lm/builder/pipeline.hh b/lm/builder/pipeline.hh
index 8f4d82103..1987daff1 100644
--- a/lm/builder/pipeline.hh
+++ b/lm/builder/pipeline.hh
@@ -44,7 +44,7 @@ struct PipelineConfig {
// Compute collapsed q values instead of probability and backoff
bool output_q;
-
+
/* Computing the perplexity of LMs with different vocabularies is hard. For
* example, the lowest perplexity is attained by a unigram model that
* predicts p(<unk>) = 1 and has no other vocabulary. Also, linearly
diff --git a/lm/builder/print.cc b/lm/builder/print.cc
index 2c8c7276c..56a3134d8 100644
--- a/lm/builder/print.cc
+++ b/lm/builder/print.cc
@@ -55,7 +55,7 @@ void PrintARPA::Run(const util::stream::ChainPositions &positions) {
if (order != positions.size())
out << '\t' << stream->Value().complete.backoff;
out << '\n';
-
+
}
out << '\n';
}
diff --git a/lm/builder/print.hh b/lm/builder/print.hh
index ad282ea85..093a35697 100644
--- a/lm/builder/print.hh
+++ b/lm/builder/print.hh
@@ -14,7 +14,7 @@
// Warning: print routines read all unigrams before all bigrams before all
// trigrams etc. So if other parts of the chain move jointly, you'll have to
-// buffer.
+// buffer.
namespace lm { namespace builder {
@@ -42,7 +42,7 @@ class VocabReconstitute {
std::vector<const char*> map_;
};
-// Not defined, only specialized.
+// Not defined, only specialized.
template <class T> void PrintPayload(util::FakeOFStream &to, const Payload &payload);
template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const Payload &payload) {
// TODO slow
@@ -55,7 +55,7 @@ template <> inline void PrintPayload<ProbBackoff>(util::FakeOFStream &to, const
to << payload.complete.prob << ' ' << payload.complete.backoff;
}
-// template parameter is the type stored.
+// template parameter is the type stored.
template <class V> class Print {
public:
static void DumpSeparateFiles(const VocabReconstitute &vocab, const std::string &file_base, util::stream::Chains &chains) {
diff --git a/lm/builder/sort.hh b/lm/builder/sort.hh
index 712bb8e35..ed20b4b79 100644
--- a/lm/builder/sort.hh
+++ b/lm/builder/sort.hh
@@ -19,7 +19,7 @@ namespace builder {
*/
template <class Child> class Comparator : public std::binary_function<const void *, const void *, bool> {
public:
-
+
/**
* Constructs a comparator capable of comparing two n-grams.
*
@@ -51,8 +51,8 @@ template <class Child> class Comparator : public std::binary_function<const void
/**
* N-gram comparator that compares n-grams according to their reverse (suffix) order.
*
- * This comparator compares n-grams lexicographically, one word at a time,
- * beginning with the last word of each n-gram and ending with the first word of each n-gram.
+ * This comparator compares n-grams lexicographically, one word at a time,
+ * beginning with the last word of each n-gram and ending with the first word of each n-gram.
*
* Some examples of n-gram comparisons as defined by this comparator:
* - a b c == a b c
@@ -64,8 +64,8 @@ template <class Child> class Comparator : public std::binary_function<const void
*/
class SuffixOrder : public Comparator<SuffixOrder> {
public:
-
- /**
+
+ /**
* Constructs a comparator capable of comparing two n-grams.
*
* @param order Number of words in each n-gram
@@ -73,7 +73,7 @@ class SuffixOrder : public Comparator<SuffixOrder> {
explicit SuffixOrder(std::size_t order) : Comparator<SuffixOrder>(order) {}
/**
- * Compares two n-grams lexicographically, one word at a time,
+ * Compares two n-grams lexicographically, one word at a time,
* beginning with the last word of each n-gram and ending with the first word of each n-gram.
*
* @param lhs A pointer to the n-gram on the left-hand side of the comparison
@@ -90,11 +90,11 @@ class SuffixOrder : public Comparator<SuffixOrder> {
static const unsigned kMatchOffset = 1;
};
-
+
/**
* N-gram comparator that compares n-grams according to the reverse (suffix) order of the n-gram context.
*
- * This comparator compares n-grams lexicographically, one word at a time,
+ * This comparator compares n-grams lexicographically, one word at a time,
* beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
* finally, this comparator compares the last word of each n-gram.
*
@@ -108,8 +108,8 @@ class SuffixOrder : public Comparator<SuffixOrder> {
*/
class ContextOrder : public Comparator<ContextOrder> {
public:
-
- /**
+
+ /**
* Constructs a comparator capable of comparing two n-grams.
*
* @param order Number of words in each n-gram
@@ -117,7 +117,7 @@ class ContextOrder : public Comparator<ContextOrder> {
explicit ContextOrder(std::size_t order) : Comparator<ContextOrder>(order) {}
/**
- * Compares two n-grams lexicographically, one word at a time,
+ * Compares two n-grams lexicographically, one word at a time,
* beginning with the penultimate word of each n-gram and ending with the first word of each n-gram;
* finally, this comparator compares the last word of each n-gram.
*
@@ -136,7 +136,7 @@ class ContextOrder : public Comparator<ContextOrder> {
/**
* N-gram comparator that compares n-grams according to their natural (prefix) order.
*
- * This comparator compares n-grams lexicographically, one word at a time,
+ * This comparator compares n-grams lexicographically, one word at a time,
* beginning with the first word of each n-gram and ending with the last word of each n-gram.
*
* Some examples of n-gram comparisons as defined by this comparator:
@@ -149,8 +149,8 @@ class ContextOrder : public Comparator<ContextOrder> {
*/
class PrefixOrder : public Comparator<PrefixOrder> {
public:
-
- /**
+
+ /**
* Constructs a comparator capable of comparing two n-grams.
*
* @param order Number of words in each n-gram
@@ -158,7 +158,7 @@ class PrefixOrder : public Comparator<PrefixOrder> {
explicit PrefixOrder(std::size_t order) : Comparator<PrefixOrder>(order) {}
/**
- * Compares two n-grams lexicographically, one word at a time,
+ * Compares two n-grams lexicographically, one word at a time,
* beginning with the first word of each n-gram and ending with the last word of each n-gram.
*
* @param lhs A pointer to the n-gram on the left-hand side of the comparison
@@ -171,7 +171,7 @@ class PrefixOrder : public Comparator<PrefixOrder> {
}
return false;
}
-
+
static const unsigned kMatchOffset = 0;
};
@@ -179,7 +179,7 @@ class PrefixOrder : public Comparator<PrefixOrder> {
struct AddCombiner {
bool operator()(void *first_void, const void *second_void, const SuffixOrder &compare) const {
NGram first(first_void, compare.Order());
- // There isn't a const version of NGram.
+ // There isn't a const version of NGram.
NGram second(const_cast<void*>(second_void), compare.Order());
if (memcmp(first.begin(), second.begin(), sizeof(WordIndex) * compare.Order())) return false;
first.Count() += second.Count();
@@ -204,10 +204,10 @@ template <class Compare> class Sorts : public util::FixedArray<util::stream::Sor
typedef util::FixedArray<S> P;
public:
-
+
/**
* Constructs, but does not initialize.
- *
+ *
* @ref util::FixedArray::Init() "Init" must be called before use.
*
* @see util::FixedArray::Init()
@@ -222,7 +222,7 @@ template <class Compare> class Sorts : public util::FixedArray<util::stream::Sor
*/
explicit Sorts(std::size_t number) : util::FixedArray<util::stream::Sort<Compare> >(number) {}
- /**
+ /**
* Constructs a new @ref util::stream::Sort "Sort" object which is stored in this @ref util::FixedArray "array".
*
* The new @ref util::stream::Sort "Sort" object is constructed using the provided @ref util::stream::SortConfig "SortConfig" and @ref Comparator "ngram comparator";
diff --git a/lm/enumerate_vocab.hh b/lm/enumerate_vocab.hh
index f5ce78985..f4c94cd26 100644
--- a/lm/enumerate_vocab.hh
+++ b/lm/enumerate_vocab.hh
@@ -10,7 +10,7 @@ namespace lm {
* and implement Add. Then put a pointer in Config.enumerate_vocab; it does
* not take ownership. Add is called once per vocab word. index starts at 0
* and increases by 1 each time. This is only used by the Model constructor;
- * the pointer is not retained by the class.
+ * the pointer is not retained by the class.
*/
class EnumerateVocab {
public:
diff --git a/lm/facade.hh b/lm/facade.hh
index 8e12b62ee..325ef159a 100644
--- a/lm/facade.hh
+++ b/lm/facade.hh
@@ -9,8 +9,8 @@
namespace lm {
namespace base {
-// Common model interface that depends on knowing the specific classes.
-// Curiously recurring template pattern.
+// Common model interface that depends on knowing the specific classes.
+// Curiously recurring template pattern.
template <class Child, class StateT, class VocabularyT> class ModelFacade : public Model {
public:
typedef StateT State;
@@ -32,7 +32,7 @@ template <class Child, class StateT, class VocabularyT> class ModelFacade : publ
*reinterpret_cast<State*>(out_state));
}
- // Default Score function calls FullScore. Model can override this.
+ // Default Score function calls FullScore. Model can override this.
float Score(const State &in_state, const WordIndex new_word, State &out_state) const {
return static_cast<const Child*>(this)->FullScore(in_state, new_word, out_state).prob;
}
@@ -53,7 +53,7 @@ template <class Child, class StateT, class VocabularyT> class ModelFacade : publ
virtual ~ModelFacade() {}
- // begin_sentence and null_context can disappear after. vocab should stay.
+ // begin_sentence and null_context can disappear after. vocab should stay.
void Init(const State &begin_sentence, const State &null_context, const Vocabulary &vocab, unsigned char order) {
begin_sentence_ = begin_sentence;
null_context_ = null_context;
diff --git a/lm/filter/count_io.hh b/lm/filter/count_io.hh
index de894baf8..02eb78baa 100644
--- a/lm/filter/count_io.hh
+++ b/lm/filter/count_io.hh
@@ -33,7 +33,7 @@ class CountOutput : boost::noncopyable {
class CountBatch {
public:
- explicit CountBatch(std::streamsize initial_read)
+ explicit CountBatch(std::streamsize initial_read)
: initial_read_(initial_read) {
buffer_.reserve(initial_read);
}
@@ -66,7 +66,7 @@ class CountBatch {
private:
std::streamsize initial_read_;
- // This could have been a std::string but that's less happy with raw writes.
+ // This could have been a std::string but that's less happy with raw writes.
std::vector<char> buffer_;
};
diff --git a/lm/filter/filter_main.cc b/lm/filter/filter_main.cc
index 82fdc1ef7..6e89d1fa3 100644
--- a/lm/filter/filter_main.cc
+++ b/lm/filter/filter_main.cc
@@ -58,7 +58,7 @@ typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} Fil
typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;
struct Config {
- Config() :
+ Config() :
#ifndef NTHREAD
batch_size(25000),
threads(boost::thread::hardware_concurrency()),
diff --git a/lm/filter/format.hh b/lm/filter/format.hh
index 5a2e2db3c..d453f05b8 100644
--- a/lm/filter/format.hh
+++ b/lm/filter/format.hh
@@ -134,12 +134,12 @@ struct CountFormat {
/* For multithreading, the buffer classes hold batches of filter inputs and
* outputs in memory. The strings get reused a lot, so keep them around
- * instead of clearing each time.
+ * instead of clearing each time.
*/
class InputBuffer {
public:
InputBuffer() : actual_(0) {}
-
+
void Reserve(size_t size) { lines_.reserve(size); }
template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
@@ -179,18 +179,18 @@ class BinaryOutputBuffer {
void Reserve(size_t size) {
lines_.reserve(size);
}
-
+
void AddNGram(const StringPiece &line) {
lines_.push_back(line);
}
-
+
template <class Output> void Flush(Output &output) {
for (std::vector<StringPiece>::const_iterator i = lines_.begin(); i != lines_.end(); ++i) {
output.AddNGram(*i);
}
lines_.clear();
}
-
+
private:
std::vector<StringPiece> lines_;
};
@@ -234,7 +234,7 @@ class MultipleOutputBuffer {
private:
struct Annotated {
- // If this is empty, send to all systems.
+ // If this is empty, send to all systems.
// A filter should never send to all systems and send to a single one.
std::vector<size_t> systems;
StringPiece line;
diff --git a/lm/filter/phrase.cc b/lm/filter/phrase.cc
index 345900ffa..d8260d54e 100644
--- a/lm/filter/phrase.cc
+++ b/lm/filter/phrase.cc
@@ -31,14 +31,14 @@ unsigned int ReadMultiple(std::istream &in, Substrings &out) {
word.clear();
}
if (c == ' ') continue;
- // It's more than just a space. Close out the phrase.
+ // It's more than just a space. Close out the phrase.
if (!phrase.empty()) {
sentence_content = true;
out.AddPhrase(sentence_id, phrase.begin(), phrase.end());
phrase.clear();
}
if (c == '\t' || c == '\v') continue;
- // It's more than a space or tab: a newline.
+ // It's more than a space or tab: a newline.
if (sentence_content) {
++sentence_id;
sentence_content = false;
@@ -53,7 +53,7 @@ typedef unsigned int Sentence;
typedef std::vector<Sentence> Sentences;
} // namespace
-namespace detail {
+namespace detail {
const StringPiece kEndSentence("</s>");
@@ -61,7 +61,7 @@ class Arc {
public:
Arc() {}
- // For arcs from one vertex to another.
+ // For arcs from one vertex to another.
void SetPhrase(detail::Vertex &from, detail::Vertex &to, const Sentences &intersect) {
Set(to, intersect);
from_ = &from;
@@ -69,7 +69,7 @@ class Arc {
/* For arcs from before the n-gram begins to somewhere in the n-gram (right
* aligned). These have no from_ vertex; it implictly matches every
- * sentence. This also handles when the n-gram is a substring of a phrase.
+ * sentence. This also handles when the n-gram is a substring of a phrase.
*/
void SetRight(detail::Vertex &to, const Sentences &complete) {
Set(to, complete);
@@ -87,12 +87,12 @@ class Arc {
/* When this function returns:
* If Empty() then there's nothing left from this intersection.
*
- * If Current() == to then to is part of the intersection.
+ * If Current() == to then to is part of the intersection.
*
* Otherwise, Current() > to. In this case, to is not part of the
* intersection and neither is anything < Current(). To determine if
* any value >= Current() is in the intersection, call LowerBound again
- * with the value.
+ * with the value.
*/
void LowerBound(const Sentence to);
@@ -160,15 +160,15 @@ void Arc::Set(Vertex &to, const Sentences &sentences) {
void Vertex::LowerBound(const Sentence to) {
if (Empty()) return;
- // Union lower bound.
+ // Union lower bound.
while (true) {
Arc *top = incoming_.top();
if (top->Current() > to) {
current_ = top->Current();
return;
}
- // If top->Current() == to, we still need to verify that's an actual
- // element and not just a bound.
+ // If top->Current() == to, we still need to verify that's an actual
+ // element and not just a bound.
incoming_.pop();
top->LowerBound(to);
if (!top->Empty()) {
@@ -213,13 +213,13 @@ void BuildGraph(const Substrings &phrase, const std::vector<Hash> &hashes, detai
}
}
- // Phrases starting at the second or later word in the n-gram.
+ // Phrases starting at the second or later word in the n-gram.
Vertex *vertex_from = vertices;
for (const Hash *word_from = first_word + 1; word_from != &*hashes.end(); ++word_from, ++vertex_from) {
hash = 0;
Vertex *vertex_to = vertex_from + 1;
for (const Hash *word_to = word_from; ; ++word_to, ++vertex_to) {
- // Notice that word_to and vertex_to have the same index.
+ // Notice that word_to and vertex_to have the same index.
hash = util::MurmurHashNative(&hash, sizeof(uint64_t), *word_to);
// Now hash covers [word_from, word_to].
if (word_to == last_word) {
@@ -250,7 +250,7 @@ detail::Vertex &ConditionCommon::MakeGraph() {
vertices_.clear();
vertices_.resize(hashes_.size());
arcs_.clear();
- // One for every substring.
+ // One for every substring.
arcs_.resize(((hashes_.size() + 1) * hashes_.size()) / 2);
BuildGraph(substrings_, hashes_, &*vertices_.begin(), &*arcs_.begin());
return vertices_[hashes_.size() - 1];
diff --git a/lm/filter/phrase.hh b/lm/filter/phrase.hh
index e5898c9ae..5227ab246 100644
--- a/lm/filter/phrase.hh
+++ b/lm/filter/phrase.hh
@@ -27,7 +27,7 @@ class Substrings {
private:
/* This is the value in a hash table where the key is a string. It indicates
* four sets of sentences:
- * substring is sentences with a phrase containing the key as a substring.
+ * substring is sentences with a phrase containing the key as a substring.
* left is sentencess with a phrase that begins with the key (left aligned).
* right is sentences with a phrase that ends with the key (right aligned).
* phrase is sentences where the key is a phrase.
@@ -39,8 +39,8 @@ class Substrings {
/* Most of the CPU is hash table lookups, so let's not complicate it with
* vector equality comparisons. If a collision happens, the SentenceRelation
* structure will contain the union of sentence ids over the colliding strings.
- * In that case, the filter will be slightly more permissive.
- * The key here is the same as boost's hash of std::vector<std::string>.
+ * In that case, the filter will be slightly more permissive.
+ * The key here is the same as boost's hash of std::vector<std::string>.
*/
typedef boost::unordered_map<Hash, SentenceRelation> Table;
@@ -58,9 +58,9 @@ class Substrings {
LM_FILTER_PHRASE_METHOD(Phrase, phrase)
#pragma GCC diagnostic ignored "-Wuninitialized" // end != finish so there's always an initialization
- // sentence_id must be non-decreasing. Iterators are over words in the phrase.
+ // sentence_id must be non-decreasing. Iterators are over words in the phrase.
template <class Iterator> void AddPhrase(unsigned int sentence_id, const Iterator &begin, const Iterator &end) {
- // Iterate over all substrings.
+ // Iterate over all substrings.
for (Iterator start = begin; start != end; ++start) {
Hash hash = 0;
SentenceRelation *relation;
@@ -85,7 +85,7 @@ class Substrings {
};
// Read a file with one sentence per line containing tab-delimited phrases of
-// space-separated words.
+// space-separated words.
unsigned int ReadMultiple(std::istream &in, Substrings &out);
namespace detail {
@@ -94,7 +94,7 @@ extern const StringPiece kEndSentence;
template <class Iterator> void MakeHashes(Iterator i, const Iterator &end, std::vector<Hash> &hashes) {
hashes.clear();
if (i == end) return;
- // TODO: check strict phrase boundaries after <s> and before </s>. For now, just skip tags.
+ // TODO: check strict phrase boundaries after <s> and before </s>. For now, just skip tags.
if ((i->data()[0] == '<') && (i->data()[i->size() - 1] == '>')) {
++i;
}
diff --git a/lm/filter/phrase_table_vocab_main.cc b/lm/filter/phrase_table_vocab_main.cc
index e0f47d894..e8a8d0265 100644
--- a/lm/filter/phrase_table_vocab_main.cc
+++ b/lm/filter/phrase_table_vocab_main.cc
@@ -88,7 +88,7 @@ class TargetWords {
class Input {
public:
- explicit Input(std::size_t max_length)
+ explicit Input(std::size_t max_length)
: max_length_(max_length), sentence_id_(0), empty_() {}
void AddSentence(StringPiece sentence, TargetWords &targets) {
@@ -125,7 +125,7 @@ class Input {
Map map_;
std::size_t sentence_id_;
-
+
// Temporaries in AddSentence.
std::string canonical_;
std::vector<std::size_t> starts_;
diff --git a/lm/filter/thread.hh b/lm/filter/thread.hh
index 6a6523f90..88e069cb1 100644
--- a/lm/filter/thread.hh
+++ b/lm/filter/thread.hh
@@ -13,29 +13,29 @@ namespace lm {
template <class OutputBuffer> class ThreadBatch {
public:
ThreadBatch() {}
-
+
void Reserve(size_t size) {
input_.Reserve(size);
output_.Reserve(size);
}
- // File reading thread.
+ // File reading thread.
InputBuffer &Fill(uint64_t sequence) {
sequence_ = sequence;
// Why wait until now to clear instead of after output? free in the same
- // thread as allocated.
+ // thread as allocated.
input_.Clear();
return input_;
}
- // Filter worker thread.
+ // Filter worker thread.
template <class Filter> void CallFilter(Filter &filter) {
input_.CallFilter(filter, output_);
}
uint64_t Sequence() const { return sequence_; }
- // File writing thread.
+ // File writing thread.
template <class RealOutput> void Flush(RealOutput &output) {
output_.Flush(output);
}
@@ -73,7 +73,7 @@ template <class Batch, class Output> class OutputWorker {
void operator()(Request request) {
assert(request->Sequence() >= base_sequence_);
- // Assemble the output in order.
+ // Assemble the output in order.
uint64_t pos = request->Sequence() - base_sequence_;
if (pos >= ordering_.size()) {
ordering_.resize(pos + 1, NULL);
@@ -102,7 +102,7 @@ template <class Filter, class OutputBuffer, class RealOutput> class Controller :
typedef ThreadBatch<OutputBuffer> Batch;
public:
- Controller(size_t batch_size, size_t queue, size_t workers, const Filter &filter, RealOutput &output)
+ Controller(size_t batch_size, size_t queue, size_t workers, const Filter &filter, RealOutput &output)
: batch_size_(batch_size), queue_size_(queue),
batches_(queue),
to_read_(queue),
diff --git a/lm/filter/vocab.cc b/lm/filter/vocab.cc
index 0a5585580..2aca4fc60 100644
--- a/lm/filter/vocab.cc
+++ b/lm/filter/vocab.cc
@@ -30,7 +30,7 @@ bool IsLineEnd(std::istream &in) {
}// namespace
// Read space separated words in enter separated lines. These lines can be
-// very long, so don't read an entire line at a time.
+// very long, so don't read an entire line at a time.
unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
in.exceptions(std::istream::badbit);
unsigned int sentence = 0;
diff --git a/lm/filter/vocab.hh b/lm/filter/vocab.hh
index 2ee6e1f8a..397a93237 100644
--- a/lm/filter/vocab.hh
+++ b/lm/filter/vocab.hh
@@ -26,7 +26,7 @@ unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, st
/* Is this a special tag like <s> or <UNK>? This actually includes anything
* surrounded with < and >, which most tokenizers separate for real words, so
- * this should not catch real words as it looks at a single token.
+ * this should not catch real words as it looks at a single token.
*/
inline bool IsTag(const StringPiece &value) {
// The parser should never give an empty string.
diff --git a/lm/filter/wrapper.hh b/lm/filter/wrapper.hh
index 822c5c27d..227ec8e45 100644
--- a/lm/filter/wrapper.hh
+++ b/lm/filter/wrapper.hh
@@ -13,7 +13,7 @@ namespace lm {
// multiple-output filter so clients code against one interface.
template <class Binary> class BinaryFilter {
public:
- // Binary modes are just references (and a set) and it makes the API cleaner to copy them.
+ // Binary modes are just references (and a set) and it makes the API cleaner to copy them.
explicit BinaryFilter(Binary binary) : binary_(binary) {}
template <class Iterator, class Output> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line, Output &output) {
diff --git a/lm/left.hh b/lm/left.hh
index 36d613697..4d496863c 100644
--- a/lm/left.hh
+++ b/lm/left.hh
@@ -1,22 +1,22 @@
/* Efficient left and right language model state for sentence fragments.
* Intended usage:
- * Store ChartState with every chart entry.
+ * Store ChartState with every chart entry.
* To do a rule application:
- * 1. Make a ChartState object for your new entry.
- * 2. Construct RuleScore.
- * 3. Going from left to right, call Terminal or NonTerminal.
- * For terminals, just pass the vocab id.
+ * 1. Make a ChartState object for your new entry.
+ * 2. Construct RuleScore.
+ * 3. Going from left to right, call Terminal or NonTerminal.
+ * For terminals, just pass the vocab id.
* For non-terminals, pass that non-terminal's ChartState.
* If your decoder expects scores inclusive of subtree scores (i.e. you
* label entries with the highest-scoring path), pass the non-terminal's
- * score as prob.
+ * score as prob.
* If your decoder expects relative scores and will walk the chart later,
- * pass prob = 0.0.
+ * pass prob = 0.0.
* In other words, the only effect of prob is that it gets added to the
- * returned log probability.
- * 4. Call Finish. It returns the log probability.
+ * returned log probability.
+ * 4. Call Finish. It returns the log probability.
*
- * There's a couple more details:
+ * There's a couple more details:
* Do not pass <s> to Terminal as it is formally not a word in the sentence,
* only context. Instead, call BeginSentence. If called, it should be the
* first call after RuleScore is constructed (since <s> is always the
@@ -27,12 +27,12 @@
* Hashing and sorting comparison operators are provided. All state objects
* are POD. If you intend to use memcmp on raw state objects, you must call
* ZeroRemaining first, as the value of array entries beyond length is
- * otherwise undefined.
+ * otherwise undefined.
*
* Usage is of course not limited to chart decoding. Anything that generates
* sentence fragments missing left context could benefit. For example, a
* phrase-based decoder could pre-score phrases, storing ChartState with each
- * phrase, even if hypotheses are generated left-to-right.
+ * phrase, even if hypotheses are generated left-to-right.
*/
#ifndef LM_LEFT_H
@@ -77,7 +77,7 @@ template <class M> class RuleScore {
left_done_ = true;
}
- // Faster version of NonTerminal for the case where the rule begins with a non-terminal.
+ // Faster version of NonTerminal for the case where the rule begins with a non-terminal.
void BeginNonTerminal(const ChartState &in, float prob = 0.0) {
prob_ = prob;
*out_ = in;
@@ -86,7 +86,7 @@ template <class M> class RuleScore {
void NonTerminal(const ChartState &in, float prob = 0.0) {
prob_ += prob;
-
+
if (!in.left.length) {
if (in.left.full) {
for (const float *i = out_->right.backoff; i < out_->right.backoff + out_->right.length; ++i) prob_ += *i;
@@ -131,26 +131,26 @@ template <class M> class RuleScore {
return;
}
- // Right state was minimized, so it's already independent of the new words to the left.
+ // Right state was minimized, so it's already independent of the new words to the left.
if (in.right.length < in.left.length) {
out_->right = in.right;
return;
}
- // Shift exisiting words down.
+ // Shift exisiting words down.
for (WordIndex *i = out_->right.words + next_use - 1; i >= out_->right.words; --i) {
*(i + in.right.length) = *i;
}
- // Add words from in.right.
+ // Add words from in.right.
std::copy(in.right.words, in.right.words + in.right.length, out_->right.words);
- // Assemble backoff composed on the existing state's backoff followed by the new state's backoff.
+ // Assemble backoff composed on the existing state's backoff followed by the new state's backoff.
std::copy(in.right.backoff, in.right.backoff + in.right.length, out_->right.backoff);
std::copy(back, back + next_use, out_->right.backoff + in.right.length);
out_->right.length = in.right.length + next_use;
}
float Finish() {
- // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram.
+ // A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram.
out_->left.full = left_done_ || (out_->left.length == model_.Order() - 1);
return prob_;
}
@@ -173,17 +173,17 @@ template <class M> class RuleScore {
back_in, // Backoffs to use
in.left.pointers[extend_length - 1], extend_length, // Words to be extended
back_out, // Backoffs for the next score
- next_use)); // Length of n-gram to use in next scoring.
+ next_use)); // Length of n-gram to use in next scoring.
if (next_use != out_->right.length) {
left_done_ = true;
if (!next_use) {
- // Early exit.
+ // Early exit.
out_->right = in.right;
prob_ += model_.UnRest(in.left.pointers + extend_length, in.left.pointers + in.left.length, extend_length + 1);
return true;
}
}
- // Continue scoring.
+ // Continue scoring.
return false;
}
diff --git a/lm/left_test.cc b/lm/left_test.cc
index b45614613..fdb641627 100644
--- a/lm/left_test.cc
+++ b/lm/left_test.cc
@@ -16,7 +16,7 @@ namespace {
#define Term(word) score.Terminal(m.GetVocabulary().Index(word));
#define VCheck(word, value) BOOST_CHECK_EQUAL(m.GetVocabulary().Index(word), value);
-// Apparently some Boost versions use templates and are pretty strict about types matching.
+// Apparently some Boost versions use templates and are pretty strict about types matching.
#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
template <class M> void Short(const M &m) {
@@ -175,7 +175,7 @@ template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vec
SLOPPY_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \
SLOPPY_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \
-// Build sentences, or parts thereof, from right to left.
+// Build sentences, or parts thereof, from right to left.
template <class M> void GrowBig(const M &m, bool rest = false) {
std::vector<WordIndex> words;
float expect;
diff --git a/lm/lm_exception.hh b/lm/lm_exception.hh
index 8bb610812..85a5738eb 100644
--- a/lm/lm_exception.hh
+++ b/lm/lm_exception.hh
@@ -1,7 +1,7 @@
#ifndef LM_LM_EXCEPTION_H
#define LM_LM_EXCEPTION_H
-// Named to avoid conflict with util/exception.hh.
+// Named to avoid conflict with util/exception.hh.
#include "util/exception.hh"
#include "util/string_piece.hh"
diff --git a/lm/max_order.hh b/lm/max_order.hh
index 5f181f3fc..0ad1379e0 100644
--- a/lm/max_order.hh
+++ b/lm/max_order.hh
@@ -1,7 +1,7 @@
#ifndef LM_MAX_ORDER_H
#define LM_MAX_ORDER_H
/* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM.
- * If not, this is the default maximum order.
+ * If not, this is the default maximum order.
* Having this limit means that State can be
* (kMaxOrder - 1) * sizeof(float) bytes instead of
* sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
diff --git a/lm/model.hh b/lm/model.hh
index c67ae2eed..b2bbe3999 100644
--- a/lm/model.hh
+++ b/lm/model.hh
@@ -25,7 +25,7 @@ namespace lm {
namespace ngram {
namespace detail {
-// Should return the same results as SRI.
+// Should return the same results as SRI.
// ModelFacade typedefs Vocabulary so we use VocabularyT to avoid naming conflicts.
template <class Search, class VocabularyT> class GenericModel : public base::ModelFacade<GenericModel<Search, VocabularyT>, State, VocabularyT> {
private:
@@ -38,7 +38,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
/* Get the size of memory that will be mapped given ngram counts. This
* does not include small non-mapped control structures, such as this class
- * itself.
+ * itself.
*/
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
@@ -46,47 +46,47 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
* files must have the format expected by this class or you'll get an
* exception. So TrieModel can only load ARPA or binary created by
* TrieModel. To classify binary files, call RecognizeBinary in
- * lm/binary_format.hh.
+ * lm/binary_format.hh.
*/
explicit GenericModel(const char *file, const Config &config = Config());
/* Score p(new_word | in_state) and incorporate new_word into out_state.
* Note that in_state and out_state must be different references:
- * &in_state != &out_state.
+ * &in_state != &out_state.
*/
FullScoreReturn FullScore(const State &in_state, const WordIndex new_word, State &out_state) const;
/* Slower call without in_state. Try to remember state, but sometimes it
- * would cost too much memory or your decoder isn't setup properly.
+ * would cost too much memory or your decoder isn't setup properly.
* To use this function, make an array of WordIndex containing the context
* vocabulary ids in reverse order. Then, pass the bounds of the array:
* [context_rbegin, context_rend). The new_word is not part of the context
- * array unless you intend to repeat words.
+ * array unless you intend to repeat words.
*/
FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
/* Get the state for a context. Don't use this if you can avoid it. Use
* BeginSentenceState or NullContextState and extend from those. If
* you're only going to use this state to call FullScore once, use
- * FullScoreForgotState.
+ * FullScoreForgotState.
* To use this function, make an array of WordIndex containing the context
* vocabulary ids in reverse order. Then, pass the bounds of the array:
- * [context_rbegin, context_rend).
+ * [context_rbegin, context_rend).
*/
void GetState(const WordIndex *context_rbegin, const WordIndex *context_rend, State &out_state) const;
/* More efficient version of FullScore where a partial n-gram has already
- * been scored.
- * NOTE: THE RETURNED .rest AND .prob ARE RELATIVE TO THE .rest RETURNED BEFORE.
+ * been scored.
+ * NOTE: THE RETURNED .rest AND .prob ARE RELATIVE TO THE .rest RETURNED BEFORE.
*/
FullScoreReturn ExtendLeft(
- // Additional context in reverse order. This will update add_rend to
+ // Additional context in reverse order. This will update add_rend to
const WordIndex *add_rbegin, const WordIndex *add_rend,
- // Backoff weights to use.
+ // Backoff weights to use.
const float *backoff_in,
// extend_left returned by a previous query.
uint64_t extend_pointer,
- // Length of n-gram that the pointer corresponds to.
+ // Length of n-gram that the pointer corresponds to.
unsigned char extend_length,
// Where to write additional backoffs for [extend_length + 1, min(Order() - 1, return.ngram_length)]
float *backoff_out,
@@ -95,17 +95,17 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
/* Return probabilities minus rest costs for an array of pointers. The
* first length should be the length of the n-gram to which pointers_begin
- * points.
+ * points.
*/
float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const {
- // Compiler should optimize this if away.
+ // Compiler should optimize this if away.
return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0;
}
private:
FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const;
- // Score bigrams and above. Do not include backoff.
+ // Score bigrams and above. Do not include backoff.
void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const;
// Appears after Size in the cc file.
@@ -116,7 +116,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const;
BinaryFormat backing_;
-
+
VocabularyT vocab_;
Search search_;
@@ -124,8 +124,8 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
} // namespace detail
-// Instead of typedef, inherit. This allows the Model etc to be forward declared.
-// Oh the joys of C and C++.
+// Instead of typedef, inherit. This allows the Model etc to be forward declared.
+// Oh the joys of C and C++.
#define LM_COMMA() ,
#define LM_NAME_MODEL(name, from)\
class name : public from {\
@@ -140,7 +140,7 @@ LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize
LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
-// Default implementation. No real reason for it to be the default.
+// Default implementation. No real reason for it to be the default.
typedef ::lm::ngram::ProbingVocabulary Vocabulary;
typedef ProbingModel Model;
diff --git a/lm/model_test.cc b/lm/model_test.cc
index 2e4b14fb4..d408d6fe4 100644
--- a/lm/model_test.cc
+++ b/lm/model_test.cc
@@ -7,7 +7,7 @@
#include <boost/test/unit_test.hpp>
#include <boost/test/floating_point_comparison.hpp>
-// Apparently some Boost versions use templates and are pretty strict about types matching.
+// Apparently some Boost versions use templates and are pretty strict about types matching.
#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
namespace lm {
@@ -118,7 +118,7 @@ template <class M> void Blanks(const M &model) {
AppendTest("not_found", 1, -1.995635 - 7.0 - 0.30103, true);
state = model.NullContextState();
- // higher looking is a blank.
+ // higher looking is a blank.
AppendTest("higher", 1, -1.509559, false);
AppendTest("looking", 2, -1.285941 - 0.30103, false);
@@ -150,7 +150,7 @@ template <class M> void Unknowns(const M &model) {
State preserve = state;
AppendTest("not_found2", 2, -15.0, true);
AppendTest("not_found3", 2, -15.0 - 2.0, true);
-
+
state = preserve;
AppendTest("however", 2, -4, true);
AppendTest("not_found3", 3, -6, true);
@@ -167,7 +167,7 @@ template <class M> void MinimalState(const M &model) {
AppendTest("foo", 1, -3.141592, true);
BOOST_CHECK_EQUAL(1, state.length);
AppendTest("bar", 2, -6.0, true);
- // Has to include the backoff weight.
+ // Has to include the backoff weight.
BOOST_CHECK_EQUAL(1, state.length);
AppendTest("bar", 1, -2.718281 + 3.0, true);
BOOST_CHECK_EQUAL(1, state.length);
@@ -263,7 +263,7 @@ template <class M> void Stateless(const M &model) {
// the
AppendTest("the", 1, -4.04005, true);
StatelessTest(5, 5, 1, -4.04005);
- // No context of the.
+ // No context of the.
StatelessTest(5, 0, 1, -1.687872);
// biarritz
StatelessTest(6, 1, 1, -1.9889);
diff --git a/lm/model_type.hh b/lm/model_type.hh
index fbe1117a5..dcdc6ac7c 100644
--- a/lm/model_type.hh
+++ b/lm/model_type.hh
@@ -8,7 +8,7 @@ namespace ngram {
* and I want to preserve existing binary files. */
typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType;
-// Historical names.
+// Historical names.
const ModelType HASH_PROBING = PROBING;
const ModelType TRIE_SORTED = TRIE;
const ModelType QUANT_TRIE_SORTED = QUANT_TRIE;
diff --git a/lm/ngram_query.hh b/lm/ngram_query.hh
index 560853749..937fe2421 100644
--- a/lm/ngram_query.hh
+++ b/lm/ngram_query.hh
@@ -22,7 +22,7 @@ struct BasicPrint {
std::cout << "Total: " << total << " OOV: " << oov << '\n';
}
void Summary(double, double, uint64_t, uint64_t) {}
-
+
};
struct FullPrint : public BasicPrint {
@@ -31,7 +31,7 @@ struct FullPrint : public BasicPrint {
}
void Summary(double ppl_including_oov, double ppl_excluding_oov, uint64_t corpus_oov, uint64_t corpus_tokens) {
- std::cout <<
+ std::cout <<
"Perplexity including OOVs:\t" << ppl_including_oov << "\n"
"Perplexity excluding OOVs:\t" << ppl_excluding_oov << "\n"
"OOVs:\t" << corpus_oov << "\n"
diff --git a/lm/partial.hh b/lm/partial.hh
index 3e67d91c5..9e4e3522e 100644
--- a/lm/partial.hh
+++ b/lm/partial.hh
@@ -35,9 +35,9 @@ template <class Model> ExtendReturn ExtendLoop(
unsigned char i = 0;
unsigned char length = pointers_end - pointers;
- // pointers_write is NULL means that the existing left state is full, so we should use completed probabilities.
+ // pointers_write is NULL means that the existing left state is full, so we should use completed probabilities.
if (pointers_write) {
- // Using full context, writing to new left state.
+ // Using full context, writing to new left state.
for (; i < length; ++i) {
FullScoreReturn ret(model.ExtendLeft(
add_rbegin, add_rbegin + value.next_use,
@@ -61,7 +61,7 @@ template <class Model> ExtendReturn ExtendLoop(
}
}
}
- // Using some of the new context.
+ // Using some of the new context.
for (; i < length && value.next_use; ++i) {
FullScoreReturn ret(model.ExtendLeft(
add_rbegin, add_rbegin + value.next_use,
@@ -73,7 +73,7 @@ template <class Model> ExtendReturn ExtendLoop(
value.adjust += ret.prob;
}
float unrest = model.UnRest(pointers + i, pointers_end, i + seen + 1);
- // Using none of the new context.
+ // Using none of the new context.
value.adjust += unrest;
std::copy(backoff_in, backoff_in + value.next_use, backoff_write);
@@ -100,7 +100,7 @@ template <class Model> float RevealBefore(const Model &model, const Right &revea
if (left.full) {
for (unsigned char i = 0; i < value.next_use; ++i) value.adjust += backoff_buffer[i];
} else {
- // If left wasn't full when it came in, put words into right state.
+ // If left wasn't full when it came in, put words into right state.
std::copy(reveal.words + seen, reveal.words + seen + value.next_use, right.words + right.length);
right.length += value.next_use;
left.full = value.make_full || (right.length == model.Order() - 1);
diff --git a/lm/partial_test.cc b/lm/partial_test.cc
index 8d309c85a..adb644fa6 100644
--- a/lm/partial_test.cc
+++ b/lm/partial_test.cc
@@ -123,7 +123,7 @@ BOOST_AUTO_TEST_CASE(EndSentence) {
before.words[1] = loin;
before.backoff[0] = -0.845098;
before.backoff[1] = 0.0;
-
+
before.length = 1;
BOOST_CHECK_CLOSE(-0.0410707, RevealBefore(m, before, 0, true, between.left, between.right), 0.001);
BOOST_CHECK_EQUAL(0, between.left.length);
@@ -159,7 +159,7 @@ void CheckAdjustment(const RestProbingModel &model, float expect, const Right &b
if (before_full) {
got += RevealBefore(model, before, before.length, true, between.left, between.right);
}
- // Sometimes they're zero and BOOST_CHECK_CLOSE fails for this.
+ // Sometimes they're zero and BOOST_CHECK_CLOSE fails for this.
BOOST_CHECK(fabs(expect - got) < 0.001);
}
diff --git a/lm/quantize.cc b/lm/quantize.cc
index 273ea3989..02b5dbc0e 100644
--- a/lm/quantize.cc
+++ b/lm/quantize.cc
@@ -50,12 +50,12 @@ void SeparatelyQuantize::UpdateConfigFromBinary(const BinaryFormat &file, uint64
void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) {
prob_bits_ = config.prob_bits;
backoff_bits_ = config.backoff_bits;
- // We need the reserved values.
+ // We need the reserved values.
if (config.prob_bits == 0) UTIL_THROW(ConfigException, "You can't quantize probability to zero");
if (config.backoff_bits == 0) UTIL_THROW(ConfigException, "You can't quantize backoff to zero");
if (config.prob_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing probability supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.prob_bits) << " bits.");
if (config.backoff_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing backoff supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.backoff_bits) << " bits.");
- // Reserve 8 byte header for bit counts.
+ // Reserve 8 byte header for bit counts.
actual_base_ = static_cast<uint8_t*>(base);
float *start = reinterpret_cast<float*>(actual_base_ + 8);
for (unsigned char i = 0; i < order - 2; ++i) {
diff --git a/lm/quantize.hh b/lm/quantize.hh
index 84a30872e..8500aceec 100644
--- a/lm/quantize.hh
+++ b/lm/quantize.hh
@@ -85,7 +85,7 @@ class DontQuantize {
void SetupMemory(void * /*start*/, unsigned char /*order*/, const Config & /*config*/) {}
static const bool kTrain = false;
- // These should never be called because kTrain is false.
+ // These should never be called because kTrain is false.
void Train(uint8_t /*order*/, std::vector<float> &/*prob*/, std::vector<float> &/*backoff*/) {}
void TrainProb(uint8_t, std::vector<float> &/*prob*/) {}
@@ -142,7 +142,7 @@ class SeparatelyQuantize {
static uint64_t Size(uint8_t order, const Config &config) {
uint64_t longest_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.prob_bits)) * sizeof(float);
uint64_t middle_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.backoff_bits)) * sizeof(float) + longest_table;
- // unigrams are currently not quantized so no need for a table.
+ // unigrams are currently not quantized so no need for a table.
return (order - 2) * middle_table + longest_table + /* for the bit counts and alignment padding) */ 8;
}
@@ -168,7 +168,7 @@ class SeparatelyQuantize {
float Rest() const { return Prob(); }
void Write(float prob, float backoff) const {
- util::WriteInt57(address_.base, address_.offset, ProbBins().Bits() + BackoffBins().Bits(),
+ util::WriteInt57(address_.base, address_.offset, ProbBins().Bits() + BackoffBins().Bits(),
(ProbBins().EncodeProb(prob) << BackoffBins().Bits()) | BackoffBins().EncodeBackoff(backoff));
}
@@ -183,7 +183,7 @@ class SeparatelyQuantize {
class LongestPointer {
public:
LongestPointer(const SeparatelyQuantize &quant, const util::BitAddress &address) : table_(&quant.LongestTable()), address_(address) {}
-
+
LongestPointer() : address_(NULL, 0) {}
bool Found() const { return address_.base != NULL; }
@@ -206,7 +206,7 @@ class SeparatelyQuantize {
void SetupMemory(void *start, unsigned char order, const Config &config);
static const bool kTrain = true;
- // Assumes 0.0 is removed from backoff.
+ // Assumes 0.0 is removed from backoff.
void Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff);
// Train just probabilities (for longest order).
void TrainProb(uint8_t order, std::vector<float> &prob);
diff --git a/lm/return.hh b/lm/return.hh
index 982ffd66a..ee1f25e94 100644
--- a/lm/return.hh
+++ b/lm/return.hh
@@ -9,7 +9,7 @@ struct FullScoreReturn {
// log10 probability
float prob;
- /* The length of n-gram matched. Do not use this for recombination.
+ /* The length of n-gram matched. Do not use this for recombination.
* Consider a model containing only the following n-grams:
* -1 foo
* -3.14 bar
@@ -18,9 +18,9 @@ struct FullScoreReturn {
*
* If you score ``bar'' then ngram_length is 1 and recombination state is the
* empty string because bar has zero backoff and does not extend to the
- * right.
- * If you score ``foo'' then ngram_length is 1 and recombination state is
- * ``foo''.
+ * right.
+ * If you score ``foo'' then ngram_length is 1 and recombination state is
+ * ``foo''.
*
* Ideally, keep output states around and compare them. Failing that,
* get out_state.ValidLength() and use that length for recombination.
@@ -29,7 +29,7 @@ struct FullScoreReturn {
/* Left extension information. If independent_left is set, then prob is
* independent of words to the left (up to additional backoff). Otherwise,
- * extend_left indicates how to efficiently extend further to the left.
+ * extend_left indicates how to efficiently extend further to the left.
*/
bool independent_left;
uint64_t extend_left; // Defined only if independent_left
diff --git a/lm/search_trie.cc b/lm/search_trie.cc
index 5b0f55fc8..a63985af6 100644
--- a/lm/search_trie.cc
+++ b/lm/search_trie.cc
@@ -517,7 +517,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve
{
WriteEntries<Quant, Bhiksha> writer(contexts, quant, unigrams, out.middle_begin_, out.longest_, counts.size(), sri);
RecursiveInsert(counts.size(), counts[0], inputs, config.ProgressMessages(), "Writing trie", writer);
- // Write the last unigram entry, which is the end pointer for the bigrams.
+ // Write the last unigram entry, which is the end pointer for the bigrams.
writer.Unigram(counts[0]);
}
diff --git a/lm/sizes.cc b/lm/sizes.cc
index 55ad586c4..dd831c505 100644
--- a/lm/sizes.cc
+++ b/lm/sizes.cc
@@ -36,7 +36,7 @@ void ShowSizes(const std::vector<uint64_t> &counts, const lm::ngram::Config &con
long int length = std::max<long int>(2, static_cast<long int>(ceil(log10((double) max_length / divide))));
std::cerr << "Memory estimate for binary LM:\ntype ";
- // right align bytes.
+ // right align bytes.
for (long int i = 0; i < length - 2; ++i) std::cerr << ' ';
std::cerr << prefix << "B\n"
diff --git a/lm/state.hh b/lm/state.hh
index d9ba596ad..2195dee73 100644
--- a/lm/state.hh
+++ b/lm/state.hh
@@ -11,7 +11,7 @@ namespace lm {
namespace ngram {
// This is a POD but if you want memcmp to return the same as operator==, call
-// ZeroRemaining first.
+// ZeroRemaining first.
class State {
public:
bool operator==(const State &other) const {
@@ -19,7 +19,7 @@ class State {
return !memcmp(words, other.words, length * sizeof(WordIndex));
}
- // Three way comparison function.
+ // Three way comparison function.
int Compare(const State &other) const {
if (length != other.length) return length < other.length ? -1 : 1;
return memcmp(words, other.words, length * sizeof(WordIndex));
@@ -30,7 +30,7 @@ class State {
return memcmp(words, other.words, length * sizeof(WordIndex)) < 0;
}
- // Call this before using raw memcmp.
+ // Call this before using raw memcmp.
void ZeroRemaining() {
for (unsigned char i = length; i < KENLM_MAX_ORDER - 1; ++i) {
words[i] = 0;
@@ -40,8 +40,8 @@ class State {
unsigned char Length() const { return length; }
- // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
- // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
+ // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
+ // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
WordIndex words[KENLM_MAX_ORDER - 1];
float backoff[KENLM_MAX_ORDER - 1];
unsigned char length;
@@ -55,7 +55,7 @@ inline uint64_t hash_value(const State &state, uint64_t seed = 0) {
struct Left {
bool operator==(const Left &other) const {
- return
+ return
length == other.length &&
(!length || (pointers[length - 1] == other.pointers[length - 1] && full == other.full));
}
diff --git a/lm/trie.cc b/lm/trie.cc
index 93320a332..72ad54484 100644
--- a/lm/trie.cc
+++ b/lm/trie.cc
@@ -14,7 +14,7 @@ namespace {
class KeyAccessor {
public:
- KeyAccessor(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_t total_bits)
+ KeyAccessor(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_t total_bits)
: base_(reinterpret_cast<const uint8_t*>(base)), key_mask_(key_mask), key_bits_(key_bits), total_bits_(total_bits) {}
typedef uint64_t Key;
@@ -38,9 +38,9 @@ bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_
uint64_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) {
uint8_t total_bits = util::RequiredBits(max_vocab) + remaining_bits;
- // Extra entry for next pointer at the end.
+ // Extra entry for next pointer at the end.
// +7 then / 8 to round up bits and convert to bytes
- // +sizeof(uint64_t) so that ReadInt57 etc don't go segfault.
+ // +sizeof(uint64_t) so that ReadInt57 etc don't go segfault.
// Note that this waste is O(order), not O(number of ngrams).
return ((1 + entries) * total_bits + 7) / 8 + sizeof(uint64_t);
}
@@ -100,7 +100,7 @@ template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Find(WordInd
template <class Bhiksha> void BitPackedMiddle<Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) {
// Write at insert_index. . .
- uint64_t last_next_write = insert_index_ * total_bits_ +
+ uint64_t last_next_write = insert_index_ * total_bits_ +
// at the offset where the next pointers are stored.
(total_bits_ - bhiksha_.InlineBits());
bhiksha_.WriteNext(base_, last_next_write, insert_index_, next_end);
diff --git a/lm/trie.hh b/lm/trie.hh
index cd39298b5..b7f0458bf 100644
--- a/lm/trie.hh
+++ b/lm/trie.hh
@@ -18,7 +18,7 @@ struct NodeRange {
uint64_t begin, end;
};
-// TODO: if the number of unigrams is a concern, also bit pack these records.
+// TODO: if the number of unigrams is a concern, also bit pack these records.
struct UnigramValue {
ProbBackoff weights;
uint64_t next;
@@ -44,24 +44,24 @@ class UnigramPointer {
class Unigram {
public:
Unigram() {}
-
+
void Init(void *start) {
unigram_ = static_cast<UnigramValue*>(start);
}
-
+
static uint64_t Size(uint64_t count) {
- // +1 in case unknown doesn't appear. +1 for the final next.
+ // +1 in case unknown doesn't appear. +1 for the final next.
return (count + 2) * sizeof(UnigramValue);
}
-
+
const ProbBackoff &Lookup(WordIndex index) const { return unigram_[index].weights; }
-
+
ProbBackoff &Unknown() { return unigram_[0].weights; }
UnigramValue *Raw() {
return unigram_;
}
-
+
UnigramPointer Find(WordIndex word, NodeRange &next) const {
UnigramValue *val = unigram_ + word;
next.begin = val->next;
@@ -71,7 +71,7 @@ class Unigram {
private:
UnigramValue *unigram_;
-};
+};
class BitPacked {
public:
@@ -99,7 +99,7 @@ template <class Bhiksha> class BitPackedMiddle : public BitPacked {
public:
static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config);
- // next_source need not be initialized.
+ // next_source need not be initialized.
BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config);
util::BitAddress Insert(WordIndex word);
diff --git a/lm/trie_sort.cc b/lm/trie_sort.cc
index c3f468746..33a2f96b5 100644
--- a/lm/trie_sort.cc
+++ b/lm/trie_sort.cc
@@ -27,7 +27,7 @@ namespace {
typedef util::SizedIterator NGramIter;
-// Proxy for an entry except there is some extra cruft between the entries. This is used to sort (n-1)-grams using the same memory as the sorted n-grams.
+// Proxy for an entry except there is some extra cruft between the entries. This is used to sort (n-1)-grams using the same memory as the sorted n-grams.
class PartialViewProxy {
public:
PartialViewProxy() : attention_size_(0), inner_() {}
@@ -64,7 +64,7 @@ class PartialViewProxy {
typedef util::SizedInnerIterator InnerIterator;
InnerIterator &Inner() { return inner_; }
- const InnerIterator &Inner() const { return inner_; }
+ const InnerIterator &Inner() const { return inner_; }
InnerIterator inner_;
};
@@ -78,7 +78,7 @@ FILE *DiskFlush(const void *mem_begin, const void *mem_end, const std::string &t
FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &temp_prefix, std::size_t entry_size, unsigned char order) {
const size_t context_size = sizeof(WordIndex) * (order - 1);
- // Sort just the contexts using the same memory.
+ // Sort just the contexts using the same memory.
PartialIter context_begin(PartialViewProxy(begin + sizeof(WordIndex), entry_size, context_size));
PartialIter context_end(PartialViewProxy(end + sizeof(WordIndex), entry_size, context_size));
@@ -91,7 +91,7 @@ FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const std::string &temp_pre
util::scoped_FILE out(util::FMakeTemp(temp_prefix));
- // Write out to file and uniqueify at the same time. Could have used unique_copy if there was an appropriate OutputIterator.
+ // Write out to file and uniqueify at the same time. Could have used unique_copy if there was an appropriate OutputIterator.
if (context_begin == context_end) return out.release();
PartialIter i(context_begin);
util::WriteOrThrow(out.get(), i->Data(), context_size);
@@ -118,7 +118,7 @@ struct ThrowCombine {
}
};
-// Useful for context files that just contain records with no value.
+// Useful for context files that just contain records with no value.
struct FirstCombine {
void operator()(std::size_t entry_size, unsigned char /*order*/, const void *first, const void * /*second*/, FILE *out) const {
util::WriteOrThrow(out, first, entry_size);
@@ -172,7 +172,7 @@ void RecordReader::Overwrite(const void *start, std::size_t amount) {
util::WriteOrThrow(file_, start, amount);
long forward = entry_size_ - internal - amount;
#if !defined(_WIN32) && !defined(_WIN64)
- if (forward)
+ if (forward)
#endif
UTIL_THROW_IF(fseek(file_, forward, SEEK_CUR), util::ErrnoException, "Couldn't seek forwards past revision");
}
@@ -191,7 +191,7 @@ SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<u
PositiveProbWarn warn(config.positive_log_probability);
unigram_.reset(util::MakeTemp(file_prefix));
{
- // In case <unk> appears.
+ // In case <unk> appears.
size_t size_out = (counts[0] + 1) * sizeof(ProbBackoff);
util::scoped_mmap unigram_mmap(util::MapZeroedWrite(unigram_.get(), size_out), size_out);
Read1Grams(f, counts[0], vocab, reinterpret_cast<ProbBackoff*>(unigram_mmap.get()), warn);
@@ -199,7 +199,7 @@ SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<u
if (!vocab.SawUnk()) ++counts[0];
}
- // Only use as much buffer as we need.
+ // Only use as much buffer as we need.
size_t buffer_use = 0;
for (unsigned int order = 2; order < counts.size(); ++order) {
buffer_use = std::max<size_t>(buffer_use, static_cast<size_t>((sizeof(WordIndex) * order + 2 * sizeof(float)) * counts[order - 1]));
@@ -240,7 +240,7 @@ class Closer {
void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &file_prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size) {
ReadNGramHeader(f, order);
const size_t count = counts[order - 1];
- // Size of weights. Does it include backoff?
+ // Size of weights. Does it include backoff?
const size_t words_size = sizeof(WordIndex) * order;
const size_t weights_size = sizeof(float) + ((order == counts.size()) ? 0 : sizeof(float));
const size_t entry_size = words_size + weights_size;
@@ -264,9 +264,9 @@ void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vo
ReadNGram(f, order, vocab, it, *reinterpret_cast<ProbBackoff*>(out + words_size), warn);
}
}
- // Sort full records by full n-gram.
+ // Sort full records by full n-gram.
util::SizedProxy proxy_begin(begin, entry_size), proxy_end(out_end, entry_size);
- // parallel_sort uses too much RAM. TODO: figure out why windows sort doesn't like my proxies.
+ // parallel_sort uses too much RAM. TODO: figure out why windows sort doesn't like my proxies.
#if defined(_WIN32) || defined(_WIN64)
std::stable_sort
#else
@@ -279,7 +279,7 @@ void SortedFiles::ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vo
done += (out_end - begin) / entry_size;
}
- // All individual files created. Merge them.
+ // All individual files created. Merge them.
while (files.size() > 1) {
files.push_back(MergeSortedFiles(files[0], files[1], file_prefix, weights_size, order, ThrowCombine()));
diff --git a/lm/trie_sort.hh b/lm/trie_sort.hh
index e5406d9b6..594efee51 100644
--- a/lm/trie_sort.hh
+++ b/lm/trie_sort.hh
@@ -1,4 +1,4 @@
-// Step of trie builder: create sorted files.
+// Step of trie builder: create sorted files.
#ifndef LM_TRIE_SORT_H
#define LM_TRIE_SORT_H
@@ -101,7 +101,7 @@ class SortedFiles {
private:
void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const std::vector<uint64_t> &counts, const std::string &prefix, unsigned char order, PositiveProbWarn &warn, void *mem, std::size_t mem_size);
-
+
util::scoped_fd unigram_;
util::scoped_FILE full_[KENLM_MAX_ORDER - 1], context_[KENLM_MAX_ORDER - 1];
diff --git a/lm/value.hh b/lm/value.hh
index 36e870848..d017d59fc 100644
--- a/lm/value.hh
+++ b/lm/value.hh
@@ -39,7 +39,7 @@ template <class Weights> class GenericProbingProxy {
const Weights *to_;
};
-// Basic proxy for trie unigrams.
+// Basic proxy for trie unigrams.
template <class Weights> class GenericTrieUnigramProxy {
public:
explicit GenericTrieUnigramProxy(const Weights &to) : to_(&to) {}
@@ -113,7 +113,7 @@ struct RestValue {
float Rest() const { return to_->rest; }
};
-// gcc 4.1 doesn't properly back dependent types :-(.
+// gcc 4.1 doesn't properly back dependent types :-(.
#pragma pack(push)
#pragma pack(4)
struct ProbingEntry {
diff --git a/lm/value_build.cc b/lm/value_build.cc
index 3ec3dce2a..ac623a6d9 100644
--- a/lm/value_build.cc
+++ b/lm/value_build.cc
@@ -3,7 +3,7 @@
#include "lm/model.hh"
#include "lm/read_arpa.hh"
-namespace lm {
+namespace lm {
namespace ngram {
template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
@@ -12,8 +12,8 @@ template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &confi
for_lower.write_mmap = NULL;
for_lower.rest_lower_files.clear();
- // Unigram models aren't supported, so this is a custom loader.
- // TODO: optimize the unigram loading?
+ // Unigram models aren't supported, so this is a custom loader.
+ // TODO: optimize the unigram loading?
{
util::FilePiece uni(config.rest_lower_files[0].c_str());
std::vector<uint64_t> number;
@@ -44,7 +44,7 @@ template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &confi
throw;
}
- // TODO: force/check same vocab.
+ // TODO: force/check same vocab.
}
template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
diff --git a/lm/value_build.hh b/lm/value_build.hh
index 6fd26ef8f..49989ab42 100644
--- a/lm/value_build.hh
+++ b/lm/value_build.hh
@@ -57,7 +57,7 @@ class MaxRestBuild {
return true;
}
- // Probing does need to go back to unigram.
+ // Probing does need to go back to unigram.
const static bool kMarkEvenLower = true;
};
diff --git a/lm/virtual_interface.hh b/lm/virtual_interface.hh
index e138ac14e..ea491fbf7 100644
--- a/lm/virtual_interface.hh
+++ b/lm/virtual_interface.hh
@@ -15,16 +15,16 @@ template <class T, class U, class V> class ModelFacade;
/* Vocabulary interface. Call Index(string) and get a word index for use in
* calling Model. It provides faster convenience functions for <s>, </s>, and
- * <unk> although you can also find these using Index.
+ * <unk> although you can also find these using Index.
*
* Some models do not load the mapping from index to string. If you need this,
* check if the model Vocabulary class implements such a function and access it
- * directly.
+ * directly.
*
* The Vocabulary object is always owned by the Model and can be retrieved from
* the Model using BaseVocabulary() for this abstract interface or
* GetVocabulary() for the actual implementation (in which case you'll need the
- * actual implementation of the Model too).
+ * actual implementation of the Model too).
*/
class Vocabulary {
public:
@@ -36,7 +36,7 @@ class Vocabulary {
/* Most implementations allow StringPiece lookups and need only override
* Index(StringPiece). SRI requires null termination and overrides all
- * three methods.
+ * three methods.
*/
virtual WordIndex Index(const StringPiece &str) const = 0;
virtual WordIndex Index(const std::string &str) const {
@@ -47,7 +47,7 @@ class Vocabulary {
}
protected:
- // Call SetSpecial afterward.
+ // Call SetSpecial afterward.
Vocabulary() {}
Vocabulary(WordIndex begin_sentence, WordIndex end_sentence, WordIndex not_found) {
@@ -59,13 +59,13 @@ class Vocabulary {
WordIndex begin_sentence_, end_sentence_, not_found_;
private:
- // Disable copy constructors. They're private and undefined.
+ // Disable copy constructors. They're private and undefined.
// Ersatz boost::noncopyable.
Vocabulary(const Vocabulary &);
Vocabulary &operator=(const Vocabulary &);
};
-/* There are two ways to access a Model.
+/* There are two ways to access a Model.
*
*
* OPTION 1: Access the Model directly (e.g. lm::ngram::Model in model.hh).
@@ -90,29 +90,29 @@ class Vocabulary {
* unsigned int Order() const;
*
* NB: In case you're wondering why the model implementation looks like it's
- * missing these methods, see facade.hh.
+ * missing these methods, see facade.hh.
*
* This is the fastest way to use a model and presents a normal State class to
- * be included in a hypothesis state structure.
+ * be included in a hypothesis state structure.
*
*
- * OPTION 2: Use the virtual interface below.
+ * OPTION 2: Use the virtual interface below.
*
- * The virtual interface allow you to decide which Model to use at runtime
+ * The virtual interface allow you to decide which Model to use at runtime
* without templatizing everything on the Model type. However, each Model has
* its own State class, so a single State cannot be efficiently provided (it
* would require using the maximum memory of any Model's State or memory
* allocation with each lookup). This means you become responsible for
- * allocating memory with size StateSize() and passing it to the Score or
- * FullScore functions provided here.
+ * allocating memory with size StateSize() and passing it to the Score or
+ * FullScore functions provided here.
*
* For example, cdec has a std::string containing the entire state of a
* hypothesis. It can reserve StateSize bytes in this string for the model
- * state.
+ * state.
*
* All the State objects are POD, so it's ok to use raw memory for storing
* State.
- * in_state and out_state must not have the same address.
+ * in_state and out_state must not have the same address.
*/
class Model {
public:
@@ -148,7 +148,7 @@ class Model {
unsigned char order_;
- // Disable copy constructors. They're private and undefined.
+ // Disable copy constructors. They're private and undefined.
// Ersatz boost::noncopyable.
Model(const Model &);
Model &operator=(const Model &);
diff --git a/lm/vocab.cc b/lm/vocab.cc
index 4fad78964..f6d834323 100644
--- a/lm/vocab.cc
+++ b/lm/vocab.cc
@@ -20,15 +20,15 @@ namespace ngram {
namespace detail {
uint64_t HashForVocab(const char *str, std::size_t len) {
// This proved faster than Boost's hash in speed trials: total load time Murmur 67090000, Boost 72210000
- // Chose to use 64A instead of native so binary format will be portable across 64 and 32 bit.
+ // Chose to use 64A instead of native so binary format will be portable across 64 and 32 bit.
return util::MurmurHash64A(str, len, 0);
}
} // namespace detail
namespace {
-// Normally static initialization is a bad idea but MurmurHash is pure arithmetic, so this is ok.
+// Normally static initialization is a bad idea but MurmurHash is pure arithmetic, so this is ok.
const uint64_t kUnknownHash = detail::HashForVocab("<unk>", 5);
-// Sadly some LMs have <UNK>.
+// Sadly some LMs have <UNK>.
const uint64_t kUnknownCapHash = detail::HashForVocab("<UNK>", 5);
void ReadWords(int fd, EnumerateVocab *enumerate, WordIndex expected_count, uint64_t offset) {
@@ -38,7 +38,7 @@ void ReadWords(int fd, EnumerateVocab *enumerate, WordIndex expected_count, uint
util::ReadOrThrow(fd, check_unk, 6);
UTIL_THROW_IF(
memcmp(check_unk, "<unk>", 6),
- FormatLoadException,
+ FormatLoadException,
"Vocabulary words are in the wrong place. This could be because the binary file was built with stale gcc and old kenlm. Stale gcc, including the gcc distributed with RedHat and OS X, has a bug that ignores pragma pack for template-dependent types. New kenlm works around this, so you'll save memory but have to rebuild any binary files using the probing data structure.");
if (!enumerate) return;
enumerate->Add(0, "<unk>");
@@ -58,7 +58,7 @@ void ReadWords(int fd, EnumerateVocab *enumerate, WordIndex expected_count, uint
util::ReadOrThrow(fd, &next_char, 1);
buf.push_back(next_char);
}
- // Ok now we have null terminated strings.
+ // Ok now we have null terminated strings.
for (const char *i = buf.data(); i != buf.data() + buf.size();) {
std::size_t length = strlen(i);
enumerate->Add(index++, StringPiece(i, length));
@@ -83,13 +83,13 @@ void WriteWordsWrapper::Add(WordIndex index, const StringPiece &str) {
SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {}
uint64_t SortedVocabulary::Size(uint64_t entries, const Config &/*config*/) {
- // Lead with the number of entries.
+ // Lead with the number of entries.
return sizeof(uint64_t) + sizeof(uint64_t) * entries;
}
void SortedVocabulary::SetupMemory(void *start, std::size_t allocated, std::size_t entries, const Config &config) {
assert(allocated >= Size(entries, config));
- // Leave space for number of entries.
+ // Leave space for number of entries.
begin_ = reinterpret_cast<uint64_t*>(start) + 1;
end_ = begin_;
saw_unk_ = false;
@@ -122,7 +122,7 @@ WordIndex SortedVocabulary::Insert(const StringPiece &str) {
strings_to_enumerate_[end_ - begin_] = StringPiece(static_cast<const char*>(copied), str.size());
}
++end_;
- // This is 1 + the offset where it was inserted to make room for unk.
+ // This is 1 + the offset where it was inserted to make room for unk.
return end_ - begin_;
}
@@ -133,7 +133,7 @@ void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) {
util::JointSort(begin_, end_, values);
}
for (WordIndex i = 0; i < static_cast<WordIndex>(end_ - begin_); ++i) {
- // <unk> strikes again: +1 here.
+ // <unk> strikes again: +1 here.
enumerate_->Add(i + 1, strings_to_enumerate_[i]);
}
strings_to_enumerate_.clear();
@@ -142,7 +142,7 @@ void SortedVocabulary::FinishedLoading(ProbBackoff *reorder_vocab) {
util::JointSort(begin_, end_, reorder_vocab + 1);
}
SetSpecial(Index("<s>"), Index("</s>"), 0);
- // Save size. Excludes UNK.
+ // Save size. Excludes UNK.
*(reinterpret_cast<uint64_t*>(begin_) - 1) = end_ - begin_;
// Includes UNK.
bound_ = end_ - begin_ + 1;
@@ -161,7 +161,7 @@ const unsigned int kProbingVocabularyVersion = 0;
namespace detail {
struct ProbingVocabularyHeader {
- // Lowest unused vocab id. This is also the number of words, including <unk>.
+ // Lowest unused vocab id. This is also the number of words, including <unk>.
unsigned int version;
WordIndex bound;
};
@@ -198,7 +198,7 @@ void ProbingVocabulary::ConfigureEnumerate(EnumerateVocab *to, std::size_t /*max
WordIndex ProbingVocabulary::Insert(const StringPiece &str) {
uint64_t hashed = detail::HashForVocab(str);
- // Prevent unknown from going into the table.
+ // Prevent unknown from going into the table.
if (hashed == kUnknownHash || hashed == kUnknownCapHash) {
saw_unk_ = true;
return 0;
diff --git a/lm/vocab.hh b/lm/vocab.hh
index d6ae07b83..2659b9ba8 100644
--- a/lm/vocab.hh
+++ b/lm/vocab.hh
@@ -35,7 +35,7 @@ class WriteWordsWrapper : public EnumerateVocab {
WriteWordsWrapper(EnumerateVocab *inner);
~WriteWordsWrapper();
-
+
void Add(WordIndex index, const StringPiece &str);
const std::string &Buffer() const { return buffer_; }
@@ -46,7 +46,7 @@ class WriteWordsWrapper : public EnumerateVocab {
std::string buffer_;
};
-// Vocabulary based on sorted uniform find storing only uint64_t values and using their offsets as indices.
+// Vocabulary based on sorted uniform find storing only uint64_t values and using their offsets as indices.
class SortedVocabulary : public base::Vocabulary {
public:
SortedVocabulary();
@@ -67,7 +67,7 @@ class SortedVocabulary : public base::Vocabulary {
// Size for purposes of file writing
static uint64_t Size(uint64_t entries, const Config &config);
- // Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary.
+ // Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary.
WordIndex Bound() const { return bound_; }
// Everything else is for populating. I'm too lazy to hide and friend these, but you'll only get a const reference anyway.
@@ -79,7 +79,7 @@ class SortedVocabulary : public base::Vocabulary {
WordIndex Insert(const StringPiece &str);
- // Reorders reorder_vocab so that the IDs are sorted.
+ // Reorders reorder_vocab so that the IDs are sorted.
void FinishedLoading(ProbBackoff *reorder_vocab);
// Trie stores the correct counts including <unk> in the header. If this was previously sized based on a count exluding <unk>, padding with 8 bytes will make it the correct size based on a count including <unk>.
@@ -98,7 +98,7 @@ class SortedVocabulary : public base::Vocabulary {
EnumerateVocab *enumerate_;
- // Actual strings. Used only when loading from ARPA and enumerate_ != NULL
+ // Actual strings. Used only when loading from ARPA and enumerate_ != NULL
util::Pool string_backing_;
std::vector<StringPiece> strings_to_enumerate_;
@@ -123,7 +123,7 @@ struct ProbingVocabularyEntry {
};
#pragma pack(pop)
-// Vocabulary storing a map from uint64_t to WordIndex.
+// Vocabulary storing a map from uint64_t to WordIndex.
class ProbingVocabulary : public base::Vocabulary {
public:
ProbingVocabulary();
@@ -137,7 +137,7 @@ class ProbingVocabulary : public base::Vocabulary {
// This just unwraps Config to get the probing_multiplier.
static uint64_t Size(uint64_t entries, const Config &config);
- // Vocab words are [0, Bound()).
+ // Vocab words are [0, Bound()).
WordIndex Bound() const { return bound_; }
// Everything else is for populating. I'm too lazy to hide and friend these, but you'll only get a const reference anyway.
diff --git a/lm/weights.hh b/lm/weights.hh
index da1963d83..f14312753 100644
--- a/lm/weights.hh
+++ b/lm/weights.hh
@@ -1,13 +1,13 @@
#ifndef LM_WEIGHTS_H
#define LM_WEIGHTS_H
-// Weights for n-grams. Probability and possibly a backoff.
+// Weights for n-grams. Probability and possibly a backoff.
namespace lm {
struct Prob {
float prob;
};
-// No inheritance so this will be a POD.
+// No inheritance so this will be a POD.
struct ProbBackoff {
float prob;
float backoff;
diff --git a/lm/wrappers/nplm.cc b/lm/wrappers/nplm.cc
index edc7b5b72..9bd7c1ed8 100644
--- a/lm/wrappers/nplm.cc
+++ b/lm/wrappers/nplm.cc
@@ -10,7 +10,7 @@
namespace lm {
namespace np {
-Vocabulary::Vocabulary(const nplm::vocabulary &vocab)
+Vocabulary::Vocabulary(const nplm::vocabulary &vocab)
: base::Vocabulary(vocab.lookup_word("<s>"), vocab.lookup_word("</s>"), vocab.lookup_word("<unk>")),
vocab_(vocab), null_word_(vocab.lookup_word("<null>")) {}
@@ -60,7 +60,7 @@ nplm::neuralLM *LoadNPLM(const std::string &file) {
}
} // namespace
-Model::Model(const std::string &file, std::size_t cache)
+Model::Model(const std::string &file, std::size_t cache)
: base_instance_(LoadNPLM(file)), vocab_(base_instance_->get_vocabulary()), cache_size_(cache) {
UTIL_THROW_IF(base_instance_->get_order() > NPLM_MAX_ORDER, util::Exception, "This NPLM has order " << (unsigned int)base_instance_->get_order() << " but the KenLM wrapper was compiled with " << NPLM_MAX_ORDER << ". Change the defintion of NPLM_MAX_ORDER and recompile.");
// log10 compatible with backoff models.
diff --git a/lm/wrappers/nplm.hh b/lm/wrappers/nplm.hh
index 416281de2..82b38fdd7 100644
--- a/lm/wrappers/nplm.hh
+++ b/lm/wrappers/nplm.hh
@@ -9,7 +9,7 @@
#include <boost/scoped_ptr.hpp>
/* Wrapper to NPLM "by Ashish Vaswani, with contributions from David Chiang
- * and Victoria Fossum."
+ * and Victoria Fossum."
* http://nlg.isi.edu/software/nplm/
*/
diff --git a/mert/ForestRescoreTest.cpp b/mert/ForestRescoreTest.cpp
index 23668ab20..f1a1c8423 100644
--- a/mert/ForestRescoreTest.cpp
+++ b/mert/ForestRescoreTest.cpp
@@ -248,7 +248,7 @@ BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) {
Vocab vocab;
//References
ReferenceSet references;
- references.AddLine(0,"in addition to EU support for businesses , also the administration of national business support will be concentrated in four Centres for Economic Development , Transport and Environment ( ELY Centres ) , starting from mid @-@ September .",vocab);
+ references.AddLine(0,"in addition to EU support for businesses , also the administration of national business support will be concentrated in four Centres for Economic Development , Transport and Environment ( ELY Centres ) , starting from mid @-@ September .",vocab);
//Load the hypergraph
Graph graph(vocab);
util::scoped_fd fd(util::OpenReadOrThrow("mert/hgtest/0.gz"));
diff --git a/misc/merge-sorted.cc b/misc/merge-sorted.cc
index ae693215b..4550a491d 100644
--- a/misc/merge-sorted.cc
+++ b/misc/merge-sorted.cc
@@ -19,10 +19,10 @@ class Part
string my_lines[2];
size_t ctr;
public:
- string const& line() const
- {
+ string const& line() const
+ {
static string empty_line;
- return f ? my_lines[ctr%2] : empty_line;
+ return f ? my_lines[ctr%2] : empty_line;
}
Part(string _fname) : ctr(0)
@@ -32,7 +32,7 @@ public:
if (!getline(*f, my_lines[0])) f.reset();
}
- bool next()
+ bool next()
{
if (!f) return false;
if (!getline(*f, my_lines[++ctr%2]))
@@ -45,16 +45,16 @@ public:
return true;
}
- bool operator <(Part const& other) const
+ bool operator <(Part const& other) const
{ return line() < other.line(); }
- bool operator <=(Part const& other) const
+ bool operator <=(Part const& other) const
{ return line() <= other.line(); }
- bool operator >(Part const& other) const
+ bool operator >(Part const& other) const
{ return line() > other.line(); }
- bool operator >=(Part const& other) const
+ bool operator >=(Part const& other) const
{ return line() >= other.line(); }
bool go(ostream& out)
@@ -66,20 +66,20 @@ public:
out << fname << "-" << ctr - 1 << "-";
out << my_lines[(ctr - 1)%2] << endl;
}
- do
+ do
{
out << fname << " " << ctr << " ";
out << line() << "\n";
}
while (next() && my_lines[0] == my_lines[1]);
#else
- do { out << line() << "\n"; }
+ do { out << line() << "\n"; }
while (next() && my_lines[0] == my_lines[1]);
out.flush();
#endif
return f != NULL;
}
-
+
};
diff --git a/misc/pmoses/pmoses.cc b/misc/pmoses/pmoses.cc
index 8b8134adc..caf66cee5 100644
--- a/misc/pmoses/pmoses.cc
+++ b/misc/pmoses/pmoses.cc
@@ -33,25 +33,25 @@ using namespace Moses;
//Delete white spaces from the end and the begining of the string
string trim(string str) {
string::iterator it;
-
+
while ((str.length()>0)&&((*(it=str.begin()))==' ')) {
str.erase(it);
}
-
+
while ((str.length()>0)&&((*(it=(str.end()-1)))==' ')) {
str.erase(it);
}
-
+
for(unsigned i=0; i<str.length(); i++) {
if ((str[i]==' ') && ((i+1)<str.length()) && (str[i+1]==' ')) {
str=str.erase(i,1);
i--;
}
}
-
+
return str;
}
-
+
int main (int argc, char *argv[]) {
vector<FactorType> input, output;
@@ -64,12 +64,12 @@ int main (int argc, char *argv[]) {
input.push_back(0);
output.push_back(0);
-
+
+ weight.push_back(0);
weight.push_back(0);
weight.push_back(0);
weight.push_back(0);
weight.push_back(0);
- weight.push_back(0);
if (argc<3) {
cerr<<"Error: Wrong number of parameters."<<endl;
@@ -86,19 +86,19 @@ int main (int argc, char *argv[]) {
}
cerr<<"numScoreComponent: "<<numScoreComponent<<endl;
- cerr<<"numInputScores: "<<numInputScores<<endl;
+ cerr<<"numInputScores: "<<numInputScores<<endl;
PhraseDictionaryTreeAdaptor *pd=new PhraseDictionaryTreeAdaptor(numScoreComponent, numInputScores);
-
+
cerr<<"Table limit: "<<tableLimit<<endl;
cerr<<"WeightWordPenalty: "<<weightWP<<endl;
cerr<<"Source phrase: ___"<<source_str<<"___"<<endl;
-
+
if (!pd->Load(input, output, filePath, weight, tableLimit, lmList, weightWP)) {
delete pd;
return false;
}
-
+
cerr<<"-------------------------------------------------"<<endl;
FactorDirection direction;
Phrase phrase(direction);
@@ -106,15 +106,15 @@ int main (int argc, char *argv[]) {
phrase.CreateFromString(input, source_str, "|");
TargetPhraseCollection *tpc = (TargetPhraseCollection*) pd->GetTargetPhraseCollection(phrase);
- if (tpc == NULL)
+ if (tpc == NULL)
cerr<<"Not found."<<endl;
- else {
+ else {
TargetPhraseCollection::iterator iterTargetPhrase;
for (iterTargetPhrase = tpc->begin(); iterTargetPhrase != tpc->end(); ++iterTargetPhrase) {
//cerr<<(*(*iterTargetPhrase))<<endl;
-
+
stringstream strs;
- strs<<static_cast<const Phrase&>(*(*iterTargetPhrase));
+ strs<<static_cast<const Phrase&>(*(*iterTargetPhrase));
cerr<<source_str<<" => ___"<<trim(strs.str())<<"___ ";
ScoreComponentCollection scc = (*iterTargetPhrase)->GetScoreBreakdown();
cerr<<"Scores: ";
@@ -123,6 +123,6 @@ int main (int argc, char *argv[]) {
}
cerr<<endl;
}
- }
- cerr<<"-------------------------------------------------"<<endl;
+ }
+ cerr<<"-------------------------------------------------"<<endl;
}
diff --git a/moses/BaseManager.cpp b/moses/BaseManager.cpp
index 3d020da9c..a89bb848a 100644
--- a/moses/BaseManager.cpp
+++ b/moses/BaseManager.cpp
@@ -11,12 +11,12 @@ using namespace std;
namespace Moses
{
-BaseManager::BaseManager(ttasksptr const& ttask)
+BaseManager::BaseManager(ttasksptr const& ttask)
: m_ttask(ttask), m_source(*(ttask->GetSource().get()))
{ }
-const InputType&
-BaseManager::GetSource() const
+const InputType&
+BaseManager::GetSource() const
{ return m_source; }
diff --git a/moses/BaseManager.h b/moses/BaseManager.h
index 4e403238a..d7a25e7fd 100644
--- a/moses/BaseManager.h
+++ b/moses/BaseManager.h
@@ -17,10 +17,10 @@ class BaseManager
protected:
// const InputType &m_source; /**< source sentence to be translated */
ttaskwptr m_ttask;
- InputType const& m_source;
+ InputType const& m_source;
+
+ BaseManager(ttasksptr const& ttask);
- BaseManager(ttasksptr const& ttask);
-
// output
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments;
diff --git a/moses/ChartCellCollection.h b/moses/ChartCellCollection.h
index 5fdc98f58..5945ce12a 100644
--- a/moses/ChartCellCollection.h
+++ b/moses/ChartCellCollection.h
@@ -35,7 +35,7 @@ class ChartParser;
class ChartCellCollectionBase
{
public:
- template <class Factory> ChartCellCollectionBase(const InputType &input,
+ template <class Factory> ChartCellCollectionBase(const InputType &input,
const Factory &factory,
const ChartParser &parser)
:m_cells(input.GetSize()) {
diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp
index 4856b3656..a0b39167a 100644
--- a/moses/ChartManager.cpp
+++ b/moses/ChartManager.cpp
@@ -67,7 +67,7 @@ ChartManager::~ChartManager()
//! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
void ChartManager::Decode()
{
-
+
VERBOSE(1,"Translating: " << m_source << endl);
ResetSentenceStats(m_source);
diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp
index b0d2a3333..66e22a055 100644
--- a/moses/ChartParser.cpp
+++ b/moses/ChartParser.cpp
@@ -37,7 +37,7 @@ namespace Moses
{
ChartParserUnknown
-::ChartParserUnknown(ttasksptr const& ttask)
+::ChartParserUnknown(ttasksptr const& ttask)
: m_ttask(ttask)
{ }
@@ -141,7 +141,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
}
ChartParser
-::ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells)
+::ChartParser(ttasksptr const& ttask, ChartCellCollectionBase &cells)
: m_ttask(ttask)
, m_unknown(ttask)
, m_decodeGraphList(StaticData::Instance().GetDecodeGraphs())
diff --git a/moses/ConfusionNet.h b/moses/ConfusionNet.h
index d680f2474..e274f2491 100644
--- a/moses/ConfusionNet.h
+++ b/moses/ConfusionNet.h
@@ -73,7 +73,7 @@ public:
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
const Word& GetWord(size_t pos) const;
- TranslationOptionCollection*
+ TranslationOptionCollection*
CreateTranslationOptionCollection(ttasksptr const& ttask) const;
const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {
diff --git a/moses/ContextScope.h b/moses/ContextScope.h
index 8f8c398a0..ed9f854ff 100644
--- a/moses/ContextScope.h
+++ b/moses/ContextScope.h
@@ -2,9 +2,9 @@
// A class to store "local" information (such as task-specific caches).
// The idea is for each translation task to have a scope, which stores
// shared pointers to task-specific objects such as caches and priors.
-// Since these objects are referenced via shared pointers, sopes can
+// Since these objects are referenced via shared pointers, sopes can
// share information.
-#pragma once
+#pragma once
#ifdef WITH_THREADS
#include <boost/thread/shared_mutex.hpp>
@@ -50,16 +50,16 @@ namespace Moses
template<typename T>
boost::shared_ptr<void> const&
- set(void const* const key, boost::shared_ptr<T> const& val)
- {
+ set(void const* const key, boost::shared_ptr<T> const& val)
+ {
boost::unique_lock<boost::shared_mutex> lock(m_lock);
return (m_scratchpad[key] = val);
}
template<typename T>
boost::shared_ptr<T> const
- get(void const* key, bool CreateNewIfNecessary=false)
- {
+ get(void const* key, bool CreateNewIfNecessary=false)
+ {
using boost::shared_mutex;
using boost::upgrade_lock;
// T const* key = reinterpret_cast<T const*>(xkey);
@@ -68,7 +68,7 @@ namespace Moses
boost::shared_ptr< T > ret;
if (m != m_scratchpad.end())
{
- if (m->second == NULL && CreateNewIfNecessary)
+ if (m->second == NULL && CreateNewIfNecessary)
{
boost::upgrade_to_unique_lock<shared_mutex> xlock(lock);
m->second.reset(new T);
@@ -85,7 +85,7 @@ namespace Moses
ContextScope() { }
- ContextScope(ContextScope const& other)
+ ContextScope(ContextScope const& other)
{
boost::unique_lock<boost::shared_mutex> lock1(this->m_lock);
boost::unique_lock<boost::shared_mutex> lock2(other.m_lock);
diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp
index 3692a68ea..7ea26f8a5 100644
--- a/moses/DecodeStepTranslation.cpp
+++ b/moses/DecodeStepTranslation.cpp
@@ -215,15 +215,15 @@ const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
UTIL_THROW(util::Exception, "Input path not found");
}
-void
+void
DecodeStepTranslation::
ProcessLEGACY(TranslationOption const& in,
- DecodeStep const& decodeStep,
- PartialTranslOptColl &out,
- TranslationOptionCollection *toc,
+ DecodeStep const& decodeStep,
+ PartialTranslOptColl &out,
+ TranslationOptionCollection *toc,
bool adhereTableLimit) const
{
- if (in.GetTargetPhrase().GetSize() == 0)
+ if (in.GetTargetPhrase().GetSize() == 0)
{
// word deletion
out.Add(new TranslationOption(in));
@@ -240,35 +240,35 @@ ProcessLEGACY(TranslationOption const& in,
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
-
- if (phraseColl != NULL)
+
+ if (phraseColl != NULL)
{
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
- ? phraseColl->begin() + tableLimit : phraseColl->end());
-
- for (iterTargetPhrase = phraseColl->begin();
- iterTargetPhrase != iterEnd;
- ++iterTargetPhrase)
+ ? phraseColl->begin() + tableLimit : phraseColl->end());
+
+ for (iterTargetPhrase = phraseColl->begin();
+ iterTargetPhrase != iterEnd;
+ ++iterTargetPhrase)
{
TargetPhrase const& targetPhrase = **iterTargetPhrase;
if (targetPhrase.GetSize() != currSize ||
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
continue;
-
+
TargetPhrase outPhrase(inPhrase);
outPhrase.Merge(targetPhrase, m_newOutputFactors);
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
-
+
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
assert(newTransOpt != NULL);
newTransOpt->SetInputPath(inputPath);
-
+
out.Add(newTransOpt);
-
+
}
- }
+ }
}
}
diff --git a/moses/ExportInterface.cpp b/moses/ExportInterface.cpp
index b1ec0ac05..27f757b5c 100644
--- a/moses/ExportInterface.cpp
+++ b/moses/ExportInterface.cpp
@@ -119,10 +119,10 @@ string SimpleTranslationInterface::translate(const string &inputString)
FeatureFunction::CallChangeSource(&*source);
// set up task of translating one sentence
- boost::shared_ptr<TranslationTask> task
+ boost::shared_ptr<TranslationTask> task
= TranslationTask::create(source, ioWrapper);
task->Run();
-
+
string output = outputStream.str();
//now trim the end whitespace
const string whitespace = " \t\f\v\n\r";
@@ -152,28 +152,28 @@ run_as_server()
string logfile; params.SetParameter(logfile, "server-log", string(""));
size_t num_threads; params.SetParameter(num_threads, "threads", size_t(10));
if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
-
+
xmlrpc_c::registry myRegistry;
-
+
xmlrpc_c::methodPtr const translator(new MosesServer::Translator(num_threads));
xmlrpc_c::methodPtr const updater(new MosesServer::Updater);
xmlrpc_c::methodPtr const optimizer(new MosesServer::Optimizer);
-
+
myRegistry.addMethod("translate", translator);
myRegistry.addMethod("updater", updater);
myRegistry.addMethod("optimize", optimizer);
-
+
xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
-
+
XVERBOSE(1,"Listening on port " << port << endl);
- if (isSerial) { while(1) myAbyssServer.runOnce(); }
+ if (isSerial) { while(1) myAbyssServer.runOnce(); }
else myAbyssServer.run();
std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
- // #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
+ // #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
#else
- // #pragma message("BUILDING MOSES WITHOUT SERVER SUPPORT")
- std::cerr << "Moses was compiled without server support." << endl;
+ // #pragma message("BUILDING MOSES WITHOUT SERVER SUPPORT")
+ std::cerr << "Moses was compiled without server support." << endl;
#endif
return 1;
@@ -189,15 +189,15 @@ batch_run()
util::rand_init();
IFVERBOSE(1) PrintUserTime("Created input-output object");
-
+
// set up read/writing class:
- boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
+ boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
UTIL_THROW_IF2(ioWrapper == NULL, "Error; Failed to create IO object"
<< " [" << HERE << "]");
-
+
// check on weights
const ScoreComponentCollection& weights = staticData.GetAllWeights();
- IFVERBOSE(2)
+ IFVERBOSE(2)
{
TRACE_ERR("The global weight vector looks like this: ");
TRACE_ERR(weights);
@@ -217,15 +217,15 @@ batch_run()
while ((source = ioWrapper->ReadInput()) != NULL)
{
IFVERBOSE(1) ResetUserTime();
-
+
FeatureFunction::CallChangeSource(source.get());
-
+
// set up task of translating one sentence
boost::shared_ptr<TranslationTask>
task = TranslationTask::create(source, ioWrapper);
task->SetContextString(context_string);
- // Allow for (sentence-)context-specific processing prior to
+ // Allow for (sentence-)context-specific processing prior to
// decoding. This can be used, for example, for context-sensitive
// phrase lookup.
FeatureFunction::SetupAll(*task);
@@ -236,7 +236,7 @@ batch_run()
// simulated post-editing requires threads (within the dynamic phrase tables)
// but runs all sentences serially, to allow updating of the bitext.
bool spe = params.isParamSpecified("spe-src");
- if (spe)
+ if (spe)
{
// simulated post-editing: always run single-threaded!
task->Run();
@@ -247,7 +247,7 @@ batch_run()
<< "missing update data for simulated post-editing.");
UTIL_THROW_IF2(!getline(*ioWrapper->spe_aln,aln), "[" << HERE << "] "
<< "missing update data for simulated post-editing.");
- BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
+ BOOST_FOREACH (PhraseDictionary* pd, PhraseDictionary::GetColl())
{
Mmsapt* sapt = dynamic_cast<Mmsapt*>(pd);
if (sapt) sapt->add(src,trg,aln);
@@ -255,7 +255,7 @@ batch_run()
VERBOSE(1,"[" << HERE << " added trg] " << trg << endl);
VERBOSE(1,"[" << HERE << " added aln] " << aln << endl);
}
- }
+ }
else pool.Submit(task);
#else
pool.Submit(task);
@@ -265,7 +265,7 @@ batch_run()
task->Run();
#endif
}
-
+
// we are done, finishing up
#ifdef WITH_THREADS
pool.Stop(true); //flush remaining jobs
@@ -274,7 +274,7 @@ batch_run()
FeatureFunction::Destroy();
IFVERBOSE(1) util::PrintUsage(std::cerr);
-
+
#ifndef EXIT_RETURN
//This avoids that destructors are called (it can take a long time)
exit(EXIT_SUCCESS);
@@ -293,9 +293,9 @@ int decoder_main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
-
+
// echo command line, if verbose
- IFVERBOSE(1)
+ IFVERBOSE(1)
{
TRACE_ERR("command: ");
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
@@ -308,29 +308,29 @@ int decoder_main(int argc, char** argv)
// load all the settings into the Parameter class
// (stores them as strings, or array of strings)
- if (!params.LoadParam(argc,argv))
+ if (!params.LoadParam(argc,argv))
exit(1);
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
- if (!StaticData::LoadDataStatic(&params, argv[0]))
+ if (!StaticData::LoadDataStatic(&params, argv[0]))
exit(1);
-
+
// setting "-show-weights" -> just dump out weights and exit
- if (params.isParamSpecified("show-weights"))
+ if (params.isParamSpecified("show-weights"))
{
ShowWeights();
exit(0);
}
-
+
if (params.GetParam("server"))
return run_as_server();
else
return batch_run();
-
- }
+
+ }
#ifdef NDEBUG
- catch (const std::exception &e)
+ catch (const std::exception &e)
{
std::cerr << "Exception: " << e.what() << std::endl;
return EXIT_FAILURE;
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index 14ef4f90f..81c6bdeb9 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -147,8 +147,8 @@ protected:
FeatureFactory() {}
};
-template <class F>
-void
+template <class F>
+void
FeatureFactory
::DefaultSetup(F *feature)
{
@@ -156,7 +156,7 @@ FeatureFactory
const string &featureName = feature->GetScoreProducerDescription();
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
-
+
if (feature->GetNumScoreComponents())
{
if (weights.size() == 0)
@@ -177,19 +177,19 @@ FeatureFactory
}
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
"FATAL ERROR: Mismatch in number of features and number "
- << "of weights for Feature Function " << featureName
- << " (features: " << feature->GetNumScoreComponents()
+ << "of weights for Feature Function " << featureName
+ << " (features: " << feature->GetNumScoreComponents()
<< " vs. weights: " << weights.size() << ")");
static_data.SetWeights(feature, weights);
}
- else if (feature->IsTuneable())
+ else if (feature->IsTuneable())
static_data.SetWeights(feature, weights);
}
namespace
{
-template <class F>
+template <class F>
class DefaultFeatureFactory : public FeatureFactory
{
public:
diff --git a/moses/FF/FeatureFunction.cpp b/moses/FF/FeatureFunction.cpp
index fcce15c4d..298a9e65c 100644
--- a/moses/FF/FeatureFunction.cpp
+++ b/moses/FF/FeatureFunction.cpp
@@ -38,10 +38,10 @@ void FeatureFunction::Destroy()
RemoveAllInColl(s_staticColl);
}
-// The original declaration as
+// The original declaration as
// void FeatureFunction::CallChangeSource(InputType *&input)
-// had me a bit perplexed. Would you really want to allow
-// any feature function to replace the InputType behind the
+// had me a bit perplexed. Would you really want to allow
+// any feature function to replace the InputType behind the
// back of the others? And change what the vector is pointing to?
void FeatureFunction::CallChangeSource(InputType * const&input)
@@ -190,17 +190,17 @@ void FeatureFunction::SetTuneableComponents(const std::string& value)
}
}
-void
+void
FeatureFunction
::InitializeForInput(ttasksptr const& ttask)
{ InitializeForInput(*(ttask->GetSource().get())); }
-void
+void
FeatureFunction
-::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
+::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
-size_t
+size_t
FeatureFunction
::GetIndex() const
{ return m_index; }
@@ -208,9 +208,9 @@ FeatureFunction
/// set index
// @return index of the next FF
-size_t
+size_t
FeatureFunction
-::SetIndex(size_t const idx)
+::SetIndex(size_t const idx)
{
m_index = idx;
return this->GetNumScoreComponents() + idx;
diff --git a/moses/FF/FeatureFunction.h b/moses/FF/FeatureFunction.h
index 082542554..a8f189f0b 100644
--- a/moses/FF/FeatureFunction.h
+++ b/moses/FF/FeatureFunction.h
@@ -27,7 +27,7 @@ class FactorMask;
class InputPath;
class StackVec;
class DistortionScoreProducer;
-class TranslationTask;
+class TranslationTask;
/** base class for all feature functions.
*/
@@ -128,12 +128,12 @@ protected:
public:
//! Called before search and collecting of translation options
- virtual void
+ virtual void
InitializeForInput(ttasksptr const& ttask);
// clean up temporary memory, called after processing each sentence
- virtual void
- CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
+ virtual void
+ CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
const std::string &
GetArgLine() const { return m_argLine; }
@@ -151,11 +151,11 @@ public:
// may have more factors than actually need, but not guaranteed.
// For SCFG decoding, the source contains non-terminals, NOT the raw
// source from the input sentence
- virtual void
+ virtual void
EvaluateInIsolation(const Phrase &source, const TargetPhrase &targetPhrase,
ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection& estimatedFutureScore) const = 0;
-
+
// override this method if you want to change the input before decoding
virtual void ChangeSource(InputType * const&input) const { }
diff --git a/moses/FF/LexicalReordering/LexicalReordering.cpp b/moses/FF/LexicalReordering/LexicalReordering.cpp
index d9b1843e9..c67a16076 100644
--- a/moses/FF/LexicalReordering/LexicalReordering.cpp
+++ b/moses/FF/LexicalReordering/LexicalReordering.cpp
@@ -65,8 +65,8 @@ LexicalReordering(const std::string &line)
}
// sanity check: number of default scores
- size_t numScores
- = m_numScoreComponents
+ size_t numScores
+ = m_numScoreComponents
= m_numTuneableComponents
= m_configuration->GetNumScoreComponents();
UTIL_THROW_IF2(m_haveDefaultScores && m_defaultScores.size() != numScores,
@@ -137,7 +137,7 @@ void
LexicalReordering::
SetCache(TranslationOption& to) const
{
- if (to.GetLexReorderingScores(this)) return;
+ if (to.GetLexReorderingScores(this)) return;
// Scores were were set already (e.g., by sampling phrase table)
Phrase const& sphrase = to.GetInputPath().GetPhrase();
@@ -157,7 +157,7 @@ void
LexicalReordering::
SetCache(TranslationOptionList& tol) const
{
- BOOST_FOREACH(TranslationOption* to, tol)
+ BOOST_FOREACH(TranslationOption* to, tol)
this->SetCache(*to);
}
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.cpp b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
index f21c45455..48fd577f1 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.cpp
@@ -101,7 +101,7 @@ GetOrientation(int const reoDistance) const
// this one is for HierarchicalReorderingBackwardState
return ((m_modelType == LeftRight)
? (reoDistance >= 1) ? R : L
- : (reoDistance == 1) ? M
+ : (reoDistance == 1) ? M
: (m_modelType == Monotonic) ? NM
: (reoDistance == -1) ? S
: (m_modelType == MSD) ? D
diff --git a/moses/FF/LexicalReordering/LexicalReorderingState.h b/moses/FF/LexicalReordering/LexicalReorderingState.h
index 48bf4698a..1e488fc41 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingState.h
+++ b/moses/FF/LexicalReordering/LexicalReorderingState.h
@@ -44,7 +44,7 @@ public:
static const ReorderingType L = 1; // left
static const ReorderingType MAX = 3; // largest possible
#else
- enum ReorderingType
+ enum ReorderingType
{
M = 0, // monotonic
NM = 1, // non-monotonic
diff --git a/moses/FF/LexicalReordering/SparseReordering.cpp b/moses/FF/LexicalReordering/SparseReordering.cpp
index 27e090ccd..5397dcb10 100644
--- a/moses/FF/LexicalReordering/SparseReordering.cpp
+++ b/moses/FF/LexicalReordering/SparseReordering.cpp
@@ -113,10 +113,10 @@ void SparseReordering::PreCalculateFeatureNames(size_t index, const string& id,
for (size_t position = SparseReorderingFeatureKey::First;
position <= SparseReorderingFeatureKey::Last; ++position) {
for (int reoType = 0; reoType <= LRModel::MAX; ++reoType) {
- SparseReorderingFeatureKey
- key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
- factor, isCluster,
- static_cast<SparseReorderingFeatureKey::Position>(position),
+ SparseReorderingFeatureKey
+ key(index, static_cast<SparseReorderingFeatureKey::Type>(type),
+ factor, isCluster,
+ static_cast<SparseReorderingFeatureKey::Position>(position),
side, static_cast<LRModel::ReorderingType>(reoType));
m_featureMap.insert(pair<SparseReorderingFeatureKey, FName>(key,m_producer->GetFeatureName(key.Name(id))));
}
diff --git a/moses/FF/LexicalReordering/SparseReordering.h b/moses/FF/LexicalReordering/SparseReordering.h
index 958ce998b..ada17d1b2 100644
--- a/moses/FF/LexicalReordering/SparseReordering.h
+++ b/moses/FF/LexicalReordering/SparseReordering.h
@@ -115,7 +115,7 @@ private:
typedef boost::unordered_map<std::string, float> WeightMap;
WeightMap m_weightMap;
bool m_useWeightMap;
- std::vector<FName> m_featureMap2;
+ std::vector<FName> m_featureMap2;
void ReadWordList(const std::string& filename, const std::string& id,
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
diff --git a/moses/FF/Model1Feature.cpp b/moses/FF/Model1Feature.cpp
index 38883c12e..6f6552461 100644
--- a/moses/FF/Model1Feature.cpp
+++ b/moses/FF/Model1Feature.cpp
@@ -19,7 +19,7 @@ Model1Vocabulary::Model1Vocabulary()
Store(m_NULL,0);
}
-bool Model1Vocabulary::Store(const Factor* word, const unsigned id)
+bool Model1Vocabulary::Store(const Factor* word, const unsigned id)
{
boost::unordered_map<const Factor*, unsigned>::iterator iter = m_lookup.find( word );
if ( iter != m_lookup.end() ) {
@@ -33,7 +33,7 @@ bool Model1Vocabulary::Store(const Factor* word, const unsigned id)
return true;
}
-unsigned Model1Vocabulary::StoreIfNew(const Factor* word)
+unsigned Model1Vocabulary::StoreIfNew(const Factor* word)
{
boost::unordered_map<const Factor*, unsigned>::iterator iter = m_lookup.find( word );
@@ -47,7 +47,7 @@ unsigned Model1Vocabulary::StoreIfNew(const Factor* word)
return id;
}
-unsigned Model1Vocabulary::GetWordID(const Factor* word) const
+unsigned Model1Vocabulary::GetWordID(const Factor* word) const
{
boost::unordered_map<const Factor*, unsigned>::const_iterator iter = m_lookup.find( word );
if ( iter == m_lookup.end() ) {
@@ -56,7 +56,7 @@ unsigned Model1Vocabulary::GetWordID(const Factor* word) const
return iter->second;
}
-const Factor* Model1Vocabulary::GetWord(unsigned id) const
+const Factor* Model1Vocabulary::GetWord(unsigned id) const
{
if (id >= m_vocab.size()) {
return NULL;
@@ -64,7 +64,7 @@ const Factor* Model1Vocabulary::GetWord(unsigned id) const
return m_vocab[ id ];
}
-void Model1Vocabulary::Load(const std::string& fileName)
+void Model1Vocabulary::Load(const std::string& fileName)
{
InputFileStream inFile(fileName);
FactorCollection &factorCollection = FactorCollection::Instance();
@@ -84,7 +84,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
}
}
- while ( getline(inFile, line) )
+ while ( getline(inFile, line) )
{
++i;
std::vector<std::string> tokens = Tokenize(line);
@@ -104,7 +104,7 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
std::string line;
unsigned i = 0;
- while ( getline(inFile, line) )
+ while ( getline(inFile, line) )
{
++i;
std::vector<std::string> tokens = Tokenize(line);
@@ -126,8 +126,8 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
float Model1LexicalTable::GetProbability(const Factor* wordS, const Factor* wordT) const
{
float prob = m_floor;
-
- boost::unordered_map< const Factor*, boost::unordered_map< const Factor*, float > >::const_iterator iter1 = m_ltable.find( wordS );
+
+ boost::unordered_map< const Factor*, boost::unordered_map< const Factor*, float > >::const_iterator iter1 = m_ltable.find( wordS );
if ( iter1 != m_ltable.end() ) {
boost::unordered_map< const Factor*, float >::const_iterator iter2 = iter1->second.find( wordT );
@@ -193,10 +193,10 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
float score = 0.0;
float norm = TransformScore(1+sentence.GetSize());
- for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
+ for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
{
const Word &wordT = targetPhrase.GetWord(posT);
- if ( !wordT.IsNonTerminal() )
+ if ( !wordT.IsNonTerminal() )
{
float thisWordProb = m_model1.GetProbability(m_emptyWord,wordT[0]); // probability conditioned on empty word
@@ -231,7 +231,7 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
float thisWordScore = TransformScore(thisWordProb) - norm;
FEATUREVERBOSE(3, "score( " << wordT << " ) = " << thisWordScore << std::endl);
{
- #ifdef WITH_THREADS
+ #ifdef WITH_THREADS
// need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
@@ -240,14 +240,14 @@ void Model1Feature::EvaluateWithSourceContext(const InputType &input
score += thisWordScore;
}
}
- }
+ }
scoreBreakdown.PlusEquals(this, score);
}
-
-void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source)
+
+void Model1Feature::CleanUpAfterSentenceProcessing(const InputType& source)
{
- #ifdef WITH_THREADS
+ #ifdef WITH_THREADS
// need to update cache; write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
diff --git a/moses/FF/Model1Feature.h b/moses/FF/Model1Feature.h
index d526d165a..9c380e3ae 100644
--- a/moses/FF/Model1Feature.h
+++ b/moses/FF/Model1Feature.h
@@ -37,7 +37,7 @@ protected:
class Model1LexicalTable
{
public:
- Model1LexicalTable(float floor=1e-7) : m_floor(floor)
+ Model1LexicalTable(float floor=1e-7) : m_floor(floor)
{}
void Load(const std::string& fileName, const Model1Vocabulary& vcbS, const Model1Vocabulary& vcbT);
@@ -100,7 +100,7 @@ private:
const Factor* m_emptyWord;
void Load();
-
+
// cache
mutable boost::unordered_map<const InputType*, boost::unordered_map<const Factor*, float> > m_cache;
#ifdef WITH_THREADS
diff --git a/moses/FF/PhraseOrientationFeature.cpp b/moses/FF/PhraseOrientationFeature.cpp
index 528896f71..2a59340ea 100644
--- a/moses/FF/PhraseOrientationFeature.cpp
+++ b/moses/FF/PhraseOrientationFeature.cpp
@@ -197,7 +197,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
FEATUREVERBOSE(4, "lastNonTerminalPreviousSourceSpanIsAligned== " << reoClassData->lastNonTerminalPreviousSourceSpanIsAligned << std::endl);
FEATUREVERBOSE(4, "lastNonTerminalFollowingSourceSpanIsAligned== " << reoClassData->lastNonTerminalFollowingSourceSpanIsAligned << std::endl;);
- if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
+ if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
@@ -467,7 +467,7 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
if ( (nNT == currTarPhr.GetAlignNonTerm().GetSize()-1) && reoClassData->lastNonTerminalIsBoundary ) {
// delay right-to-left scoring
-
+
FEATUREVERBOSE(3, "Delaying right-to-left scoring" << std::endl);
std::bitset<3> possibleFutureOrientationsR2L(0x7);
diff --git a/moses/FF/RulePairUnlexicalizedSource.cpp b/moses/FF/RulePairUnlexicalizedSource.cpp
index c31978423..148d54052 100644
--- a/moses/FF/RulePairUnlexicalizedSource.cpp
+++ b/moses/FF/RulePairUnlexicalizedSource.cpp
@@ -10,7 +10,7 @@ using namespace std;
namespace Moses
{
-
+
RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
: StatelessFeatureFunction(0, line)
, m_glueRules(false)
@@ -51,7 +51,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
return;
}
- for (size_t posS=0; posS<source.GetSize(); ++posS)
+ for (size_t posS=0; posS<source.GetSize(); ++posS)
{
const Word &wordS = source.GetWord(posS);
if ( !wordS.IsNonTerminal() ) {
@@ -61,7 +61,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
ostringstream namestr;
- for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
+ for (size_t posT=0; posT<targetPhrase.GetSize(); ++posT)
{
const Word &wordT = targetPhrase.GetWord(posT);
const Factor* factorT = wordT[0];
@@ -78,7 +78,7 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
namestr << targetPhraseLHS->GetString() << "|";
for (AlignmentInfo::const_iterator it=targetPhrase.GetAlignNonTerm().begin();
- it!=targetPhrase.GetAlignNonTerm().end(); ++it)
+ it!=targetPhrase.GetAlignNonTerm().end(); ++it)
{
namestr << "|" << it->first << "-" << it->second;
}
diff --git a/moses/FF/RuleScope.cpp b/moses/FF/RuleScope.cpp
index bc1cb3ebd..08987537d 100644
--- a/moses/FF/RuleScope.cpp
+++ b/moses/FF/RuleScope.cpp
@@ -70,11 +70,11 @@ void RuleScope::EvaluateInIsolation(const Phrase &source
estimatedFutureScore.PlusEquals(this, scores);
}
else {
- scoreBreakdown.PlusEquals(this, scores);
+ scoreBreakdown.PlusEquals(this, scores);
}
}
else if (m_futureCostOnly) {
- estimatedFutureScore.PlusEquals(this, score);
+ estimatedFutureScore.PlusEquals(this, score);
}
else {
scoreBreakdown.PlusEquals(this, score);
diff --git a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
index d57c42d99..f788f8e53 100644
--- a/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
+++ b/moses/FF/SoftSourceSyntacticConstraintsFeature.cpp
@@ -556,8 +556,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
for (boost::unordered_set<size_t>::const_iterator treeInputLabelsLHSIt = treeInputLabelsLHS.begin();
treeInputLabelsLHSIt != treeInputLabelsLHS.end(); ++treeInputLabelsLHSIt) {
- scoreBreakdown.PlusEquals(this,
- "LHSPAIR_" + targetLHS->GetString().as_string() + "_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt],
+ scoreBreakdown.PlusEquals(this,
+ "LHSPAIR_" + targetLHS->GetString().as_string() + "_" + m_sourceLabelsByIndex[*treeInputLabelsLHSIt],
(float)1/treeInputLabelsLHS.size());
if (!m_targetSourceLHSJointCountFile.empty()) {
@@ -567,8 +567,8 @@ void SoftSourceSyntacticConstraintsFeature::EvaluateWithSourceContext(const Inpu
}
}
if ( treeInputLabelsLHS.size() == 0 ) {
- scoreBreakdown.PlusEquals(this,
- "LHSPAIR_" + targetLHS->GetString().as_string() + "_" + outputDefaultNonTerminal[0]->GetString().as_string(),
+ scoreBreakdown.PlusEquals(this,
+ "LHSPAIR_" + targetLHS->GetString().as_string() + "_" + outputDefaultNonTerminal[0]->GetString().as_string(),
1);
if (!m_targetSourceLHSJointCountFile.empty()) {
t2sLabelsScore = TransformScore(m_floor);
diff --git a/moses/FF/VW/VW.h b/moses/FF/VW/VW.h
index dd9d0b858..c94791c32 100644
--- a/moses/FF/VW/VW.h
+++ b/moses/FF/VW/VW.h
@@ -165,7 +165,7 @@ public:
const std::vector<VWFeatureBase*>& sourceFeatures =
VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
- const std::vector<VWFeatureBase*>& targetFeatures =
+ const std::vector<VWFeatureBase*>& targetFeatures =
VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
const WordsRange &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
@@ -229,7 +229,7 @@ public:
//
// predict using a trained classifier, use this in decoding (=at test time)
//
-
+
std::vector<float> losses(translationOptionList.size());
// extract source side features
@@ -296,7 +296,7 @@ public:
// classifier (squared/logistic/hinge/...), hence the name "loss"
if (value == "logistic") {
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
- } else if (value == "squared") {
+ } else if (value == "squared") {
m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
} else {
UTIL_THROW2("Unknown loss type:" << value);
@@ -317,7 +317,7 @@ public:
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
"TabbedSentence must contain target<tab>alignment");
-
+
// target sentence represented as a phrase
Phrase *target = new Phrase();
target->CreateFromString(
@@ -431,7 +431,7 @@ private:
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
// extract raw counts from phrase-table property
- const CountsPhraseProperty *property =
+ const CountsPhraseProperty *property =
static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
if (! property) {
diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp
index 79a469523..b792d11f8 100644
--- a/moses/Hypothesis.cpp
+++ b/moses/Hypothesis.cpp
@@ -86,7 +86,7 @@ namespace Moses
, m_sourceInput(prevHypo.m_sourceInput)
, m_currSourceWordsRange(transOpt.GetSourceWordsRange())
, m_currTargetWordsRange(prevHypo.m_currTargetWordsRange.GetEndPos() + 1,
- prevHypo.m_currTargetWordsRange.GetEndPos()
+ prevHypo.m_currTargetWordsRange.GetEndPos()
+ transOpt.GetTargetPhrase().GetSize())
, m_wordDeleted(false)
, m_totalScore(0.0f)
@@ -127,7 +127,7 @@ namespace Moses
}
}
- void
+ void
Hypothesis::
AddArc(Hypothesis *loserHypo)
{
@@ -156,7 +156,7 @@ namespace Moses
/***
* return the subclass of Hypothesis most appropriate to the given translation option
*/
- Hypothesis*
+ Hypothesis*
Hypothesis::
CreateNext(const TranslationOption &transOpt) const
{
@@ -166,7 +166,7 @@ namespace Moses
/***
* return the subclass of Hypothesis most appropriate to the given translation option
*/
- Hypothesis*
+ Hypothesis*
Hypothesis::
Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
{
@@ -182,9 +182,9 @@ namespace Moses
* return the subclass of Hypothesis most appropriate to the given target phrase
*/
- Hypothesis*
+ Hypothesis*
Hypothesis::
- Create(Manager& manager, InputType const& m_source,
+ Create(Manager& manager, InputType const& m_source,
const TranslationOption &initialTransOpt)
{
#ifdef USE_HYPO_POOL
@@ -200,7 +200,7 @@ namespace Moses
keep an ordered list of hypotheses. This makes recombination
much quicker.
*/
- int
+ int
Hypothesis::
RecombineCompare(const Hypothesis &compare) const
{
@@ -223,22 +223,22 @@ namespace Moses
return 0;
}
- void
+ void
Hypothesis::
EvaluateWhenApplied(StatefulFeatureFunction const& sfff,
int state_idx)
{
const StaticData &staticData = StaticData::Instance();
- if (! staticData.IsFeatureFunctionIgnored( sfff ))
+ if (! staticData.IsFeatureFunctionIgnored( sfff ))
{
- m_ffStates[state_idx]
+ m_ffStates[state_idx]
= sfff.EvaluateWhenApplied
(*this, m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
&m_currScoreBreakdown);
}
}
- void
+ void
Hypothesis::
EvaluateWhenApplied(const StatelessFeatureFunction& slff)
{
@@ -251,7 +251,7 @@ namespace Moses
/***
* calculate the logarithm of our total translation score (sum up components)
*/
- void
+ void
Hypothesis::
EvaluateWhenApplied(const SquareMatrix &futureScore)
{
@@ -309,7 +309,7 @@ namespace Moses
/**
* print hypothesis information for pharaoh-style logging
*/
- void
+ void
Hypothesis::
PrintHypothesis() const
{
@@ -346,7 +346,7 @@ namespace Moses
//PrintLMScores();
}
- void
+ void
Hypothesis::
CleanupArcList()
{
@@ -361,27 +361,27 @@ namespace Moses
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
- bool distinctNBest = (staticData.GetDistinctNBest() ||
- staticData.GetLatticeSamplesSize() ||
- staticData.UseMBR() ||
- staticData.GetOutputSearchGraph() ||
- staticData.GetOutputSearchGraphSLF() ||
- staticData.GetOutputSearchGraphHypergraph() ||
+ bool distinctNBest = (staticData.GetDistinctNBest() ||
+ staticData.GetLatticeSamplesSize() ||
+ staticData.UseMBR() ||
+ staticData.GetOutputSearchGraph() ||
+ staticData.GetOutputSearchGraphSLF() ||
+ staticData.GetOutputSearchGraphHypergraph() ||
staticData.UseLatticeMBR());
- if (!distinctNBest && m_arcList->size() > nBestSize * 5)
+ if (!distinctNBest && m_arcList->size() > nBestSize * 5)
{
// prune arc list only if there too many arcs
NTH_ELEMENT4(m_arcList->begin(), m_arcList->begin() + nBestSize - 1,
m_arcList->end(), CompareHypothesisTotalScore());
-
+
// delete bad ones
ArcList::iterator iter;
- for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
+ for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter)
FREEHYPO(*iter);
m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end());
}
-
+
// set all arc's main hypo variable to this hypo
ArcList::iterator iter = m_arcList->begin();
for (; iter != m_arcList->end() ; ++iter) {
@@ -395,15 +395,15 @@ namespace Moses
GetCurrTargetPhrase() const
{ return m_transOpt.GetTargetPhrase(); }
- void
+ void
Hypothesis::
GetOutputPhrase(Phrase &out) const
{
- if (m_prevHypo != NULL)
+ if (m_prevHypo != NULL)
m_prevHypo->GetOutputPhrase(out);
out.Append(GetCurrTargetPhrase());
}
-
+
TO_STRING_BODY(Hypothesis)
// friend
@@ -424,37 +424,37 @@ namespace Moses
}
- std::string
+ std::string
Hypothesis::
GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
{ return m_transOpt.GetInputPath().GetPhrase().GetStringRep(factorsToPrint); }
- std::string
+ std::string
Hypothesis::
GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
{ return (m_prevHypo ? GetCurrTargetPhrase().GetStringRep(factorsToPrint) : ""); }
- std::string
+ std::string
Hypothesis::
GetSourcePhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
- for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+ for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetSourcePhraseStringRep(allFactors);
}
- std::string
+ std::string
Hypothesis::
GetTargetPhraseStringRep() const
{
vector<FactorType> allFactors(MAX_NUM_FACTORS);
- for(size_t i=0; i < MAX_NUM_FACTORS; i++)
+ for(size_t i=0; i < MAX_NUM_FACTORS; i++)
allFactors[i] = i;
return GetTargetPhraseStringRep(allFactors);
}
- void
+ void
Hypothesis::
OutputAlignment(std::ostream &out) const
{
@@ -464,32 +464,32 @@ namespace Moses
edges.push_back(currentHypo);
currentHypo = currentHypo->GetPrevHypo();
}
-
+
OutputAlignment(out, edges);
-
+
}
-
- void
+
+ void
Hypothesis::
OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
{
size_t targetOffset = 0;
-
+
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
const TargetPhrase &tp = edge.GetCurrTargetPhrase();
size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
-
+
OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
-
+
targetOffset += tp.GetSize();
}
// Used by --print-alignment-info, so no endl
}
- void
+ void
Hypothesis::
- OutputAlignment(ostream &out, const AlignmentInfo &ai,
+ OutputAlignment(ostream &out, const AlignmentInfo &ai,
size_t sourceOffset, size_t targetOffset)
{
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
@@ -500,20 +500,20 @@ namespace Moses
const std::pair<size_t,size_t> &alignment = **it;
out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
}
-
+
}
- void
+ void
Hypothesis::
OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
{
if (!hypo->GetPrevHypo()) return;
OutputInput(map, hypo->GetPrevHypo());
- map[hypo->GetCurrSourceWordsRange().GetStartPos()]
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()]
= &hypo->GetTranslationOption().GetInputPath().GetPhrase();
}
- void
+ void
Hypothesis::
OutputInput(std::ostream& os) const
{
@@ -523,13 +523,13 @@ namespace Moses
for (size_t i=0; i<len; ++i)
if (inp_phrases[i]) os << *inp_phrases[i];
}
-
- void
+
+ void
Hypothesis::
OutputBestSurface(std::ostream &out, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
- if (m_prevHypo)
+ if (m_prevHypo)
{ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
m_prevHypo->OutputBestSurface(out, outputFactorOrder, reportSegmentation, reportAllFactors);
}
@@ -540,9 +540,9 @@ namespace Moses
/***
* print surface factor only for the given phrase
*/
- void
+ void
Hypothesis::
- OutputSurface(std::ostream &out, const Hypothesis &edge,
+ OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
{
@@ -616,15 +616,15 @@ namespace Moses
}
}
- std::map<size_t, const Factor*>
+ std::map<size_t, const Factor*>
Hypothesis::
GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
{
const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
const Phrase &inputPhrase = inputPath.GetPhrase();
-
+
std::map<size_t, const Factor*> ret;
-
+
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
if (factor) {
@@ -634,7 +634,7 @@ namespace Moses
ret[*targetPos.begin()] = factor;
}
}
-
+
return ret;
}
@@ -646,8 +646,8 @@ namespace Moses
using namespace std;
WordsRange const& src = this->GetCurrSourceWordsRange();
WordsRange const& trg = this->GetCurrTargetWordsRange();
-
- vector<pair<size_t,size_t> const* > a
+
+ vector<pair<size_t,size_t> const* > a
= this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments();
typedef pair<size_t,size_t> item;
map<string, xmlrpc_c::value> M;
@@ -659,7 +659,7 @@ namespace Moses
}
}
- void
+ void
Hypothesis::
OutputWordAlignment(vector<xmlrpc_c::value>& out) const
{
@@ -671,7 +671,7 @@ namespace Moses
}
#endif
-
-
+
+
}
diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h
index ddd0d9af3..0ce75b83c 100644
--- a/moses/Hypothesis.h
+++ b/moses/Hypothesis.h
@@ -41,7 +41,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifdef HAVE_XMLRPC_C
#include <xmlrpc-c/base.hpp>
-#endif
+#endif
namespace Moses
{
@@ -291,10 +291,10 @@ public:
#ifdef HAVE_XMLRPC_C
void OutputWordAlignment(std::vector<xmlrpc_c::value>& out) const;
void OutputLocalWordAlignment(std::vector<xmlrpc_c::value>& dest) const;
-#endif
+#endif
+
-
};
std::ostream& operator<<(std::ostream& out, const Hypothesis& hypothesis);
@@ -313,7 +313,7 @@ struct CompareHypothesisTotalScore {
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool(); \
pool.freeObject(hypo); \
} \
-
+
#else
#define FREEHYPO(hypo) delete hypo
#endif
diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp
index 92994e234..57717e880 100644
--- a/moses/IOWrapper.cpp
+++ b/moses/IOWrapper.cpp
@@ -87,7 +87,7 @@ IOWrapper::IOWrapper()
{
const StaticData &staticData = StaticData::Instance();
- m_inputType = staticData.GetInputType();
+ m_inputType = staticData.GetInputType();
m_currentLine = staticData.GetStartTranslationId();
m_inputFactorOrder = &staticData.GetInputFactorOrder();
@@ -269,7 +269,7 @@ IOWrapper::ReadInput()
#endif
if (source->Read(*m_inputStream, *m_inputFactorOrder))
source->SetTranslationId(m_currentLine++);
- else
+ else
source.reset();
return source;
}
diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp
index 85eec5cfc..5e6139d3b 100644
--- a/moses/Incremental.cpp
+++ b/moses/Incremental.cpp
@@ -203,11 +203,11 @@ struct ChartCellBaseFactory {
} // namespace
-Manager::Manager(ttasksptr const& ttask)
+Manager::Manager(ttasksptr const& ttask)
: BaseManager(ttask)
, cells_(m_source, ChartCellBaseFactory(), parser_)
, parser_(ttask, cells_)
- , n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
+ , n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
{ }
Manager::~Manager()
diff --git a/moses/InputType.cpp b/moses/InputType.cpp
index a06c106bd..d01fdd46e 100644
--- a/moses/InputType.cpp
+++ b/moses/InputType.cpp
@@ -29,7 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
-InputType::InputType(long translationId)
+InputType::InputType(long translationId)
: m_translationId(translationId)
{
m_frontSpanCoveredLength = 0;
diff --git a/moses/InputType.h b/moses/InputType.h
index bb872249d..24c7ef4fb 100644
--- a/moses/InputType.h
+++ b/moses/InputType.h
@@ -180,7 +180,7 @@ public:
virtual void Print(std::ostream&) const =0;
//! create trans options specific to this InputType
- virtual TranslationOptionCollection*
+ virtual TranslationOptionCollection*
CreateTranslationOptionCollection(ttasksptr const& ttask) const=0;
//! return substring. Only valid for Sentence class. TODO - get rid of this fn
diff --git a/moses/LM/RDLM.cpp b/moses/LM/RDLM.cpp
index 68e77d980..179b67095 100644
--- a/moses/LM/RDLM.cpp
+++ b/moses/LM/RDLM.cpp
@@ -50,7 +50,7 @@ void RDLM::Load() {
UTIL_THROW_IF2(size_head != lm_head_base_instance_->get_order(),
"Error: order of head LM (" << lm_head_base_instance_->get_order() << ") does not match context size specified (left_context=" << m_context_left << " , right_context=" << m_context_right << " , up_context=" << m_context_up << " for a total order of " << size_head);
- UTIL_THROW_IF2(size_label != lm_label_base_instance_->get_order(),
+ UTIL_THROW_IF2(size_label != lm_label_base_instance_->get_order(),
"Error: order of label LM (" << lm_label_base_instance_->get_order() << ") does not match context size specified (left_context=" << m_context_left << " , right_context=" << m_context_right << " , up_context=" << m_context_up << " for a total order of " << size_label);
//get int value of commonly used tokens
@@ -96,10 +96,10 @@ void RDLM::Load() {
// TreePointer mytree3 (new InternalTree("[ADJA europäische]"));
// TreePointer mytree4 (new InternalTree("[pred [det [ART die]] [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [attr [ADJA]] [NN Zeit]]]"));
// TreePointer mytree2 (new InternalTree("[vroot [subj [PPER ich]] [VAFIN bin] [pred]]"));
-//
+//
// std::vector<int> ancestor_heads;
// std::vector<int> ancestor_labels;
-//
+//
// size_t boundary_hash(0);
// boost::array<float, 4> score;
// score.fill(0);
@@ -108,48 +108,48 @@ void RDLM::Load() {
// TreePointerMap back_pointers = AssociateLeafNTs(mytree3.get(), previous_trees);
// Score(mytree3.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// previous_trees.push_back(mytree3);
// back_pointers = AssociateLeafNTs(mytree4.get(), previous_trees);
// std::cerr << "scoring: " << mytree4->GetString() << std::endl;
// Score(mytree4.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// mytree4->Combine(previous_trees);
// previous_trees.clear();
// previous_trees.push_back(mytree4);
// back_pointers = AssociateLeafNTs(mytree2.get(), previous_trees);
// std::cerr << "scoring: " << mytree2->GetString() << std::endl;
-//
+//
// score[1] = 0;
// score[3] = 0;
// Score(mytree2.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// score[0] = 0;
// score[1] = 0;
// score[2] = 0;
// score[3] = 0;
// std::cerr << "scoring: " << mytree->GetString() << std::endl;
-//
+//
// Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// }
// UTIL_THROW2("Finished");
-//
+//
// }
-//
+//
// {
// std::cerr << "BINARIZED\n\n";
// TreePointer mytree (new InternalTree("[vroot [subj [PPER ich]] [^vroot [VAFIN bin] [pred [det [ART die]] [^pred [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [^pred [attr [ADJA europäische]] [NN Zeit]]]]]]"));
// TreePointer mytree3 (new InternalTree("[ADJA europäische]"));
// TreePointer mytree4 (new InternalTree("[^pred [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [^pred [attr [ADJA]] [NN Zeit]]]"));
// TreePointer mytree2 (new InternalTree("[vroot [subj [PPER ich]] [^vroot [VAFIN bin] [pred [det [ART die]] [^pred]]]]"));
-//
+//
// std::vector<int> ancestor_heads;
// std::vector<int> ancestor_labels;
-//
+//
// size_t boundary_hash(0);
// boost::array<float, 4> score;
// score.fill(0);
@@ -158,33 +158,33 @@ void RDLM::Load() {
// TreePointerMap back_pointers = AssociateLeafNTs(mytree3.get(), previous_trees);
// Score(mytree3.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// previous_trees.push_back(mytree3);
// back_pointers = AssociateLeafNTs(mytree4.get(), previous_trees);
// std::cerr << "scoring: " << mytree4->GetString() << std::endl;
// Score(mytree4.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// mytree4->Combine(previous_trees);
// previous_trees.clear();
// previous_trees.push_back(mytree4);
// back_pointers = AssociateLeafNTs(mytree2.get(), previous_trees);
// std::cerr << "scoring: " << mytree2->GetString() << std::endl;
-//
+//
// score[1] = 0;
// score[3] = 0;
// Score(mytree2.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// score[0] = 0;
// score[1] = 0;
// score[2] = 0;
// score[3] = 0;
// std::cerr << "scoring: " << mytree->GetString() << std::endl;
-//
+//
// Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
-//
+//
// }
// UTIL_THROW2("Finished");
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index a936fa7c7..8daaa6c8e 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -72,7 +72,7 @@ Manager::Manager(ttasksptr const& ttask)
const StaticData &staticData = StaticData::Instance();
SearchAlgorithm searchAlgorithm = staticData.GetSearchAlgorithm();
- m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
+ m_search = Search::CreateSearch(*this, *source, searchAlgorithm,
*m_transOptColl);
StaticData::Instance().InitializeForInput(ttask);
@@ -85,8 +85,8 @@ Manager::~Manager()
StaticData::Instance().CleanUpAfterSentenceProcessing(m_ttask.lock());
}
-const InputType&
-Manager::GetSource() const
+const InputType&
+Manager::GetSource() const
{ return m_source ; }
/**
@@ -129,7 +129,7 @@ void Manager::Decode()
Timer searchTime;
searchTime.start();
m_search->Decode();
- VERBOSE(1, "Line " << m_source.GetTranslationId()
+ VERBOSE(1, "Line " << m_source.GetTranslationId()
<< ": Search took " << searchTime << " seconds" << endl);
IFVERBOSE(2) {
GetSentenceStats().StopTimeTotal();
diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp
index 515f995cd..a087400be 100644
--- a/moses/MockHypothesis.cpp
+++ b/moses/MockHypothesis.cpp
@@ -32,10 +32,10 @@ namespace MosesTest
MockHypothesisGuard
::MockHypothesisGuard
-( const string& sourceSentence,
+( const string& sourceSentence,
const vector<Alignment>& alignments,
const vector<string>& targetSegments)
- : m_initialTransOpt(), m_wp("WordPenalty"),
+ : m_initialTransOpt(), m_wp("WordPenalty"),
m_uwp("UnknownWordPenalty"), m_dist("Distortion")
{
BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size());
diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp
index 3c21a6725..5b5d76828 100644
--- a/moses/Parameter.cpp
+++ b/moses/Parameter.cpp
@@ -47,7 +47,7 @@ Parameter::Parameter()
{
///////////////////////////////////////////////////////////////////////////////////////
// general options
- po::options_description main_opts("Main Options");
+ po::options_description main_opts("Main Options");
AddParam(main_opts,"config", "f", "location of the configuration file");
AddParam(main_opts,"input-file", "i", "location of the input file to be translated");
@@ -57,7 +57,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// factorization options
- po::options_description factor_opts("General Factorization Options");
+ po::options_description factor_opts("General Factorization Options");
AddParam(factor_opts,"factor-delimiter", "fd", "specify a different factor delimiter than the default");
// one should be able to specify different factor delimiters for intput and output
AddParam(factor_opts,"mapping", "description of decoding steps"); // whatever that means ...
@@ -65,7 +65,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// general search options
- po::options_description search_opts("Search Options");
+ po::options_description search_opts("Search Options");
string desc = "Which search algorithm to use.\n";
desc += "0=normal stack (default)\n";
desc += "1=cube pruning\n";
@@ -120,7 +120,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// minimum bayes risk decoding
po::options_description mbr_opts("Minimum Bayes Risk (MBR), Lattice MBR, and Consensus decoding");
-
+
AddParam(mbr_opts,"minimum-bayes-risk", "mbr", "use miminum Bayes risk to determine best translation");
AddParam(mbr_opts,"mbr-size", "number of translation candidates considered in MBR decoding (default 200)");
AddParam(mbr_opts,"mbr-scale", "scaling factor to convert log linear score probability in MBR decoding (default 1.0)");
@@ -138,7 +138,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// OOV handling options
- po::options_description oov_opts("OOV Handling Options");
+ po::options_description oov_opts("OOV Handling Options");
AddParam(oov_opts,"drop-unknown", "du", "drop unknown words instead of copying them");
AddParam(oov_opts,"mark-unknown", "mu", "mark unknown words in output");
AddParam(oov_opts,"lmodel-oov-feature", "add language model oov feature, one per model");
@@ -146,7 +146,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// input options
- po::options_description input_opts("Input Format Options");
+ po::options_description input_opts("Input Format Options");
AddParam(input_opts,"input-factors", "list of factors in the input");
AddParam(input_opts,"inputtype", "text (0), confusion network (1), word lattice (2), tree (3) (default = 0)");
AddParam(input_opts,"xml-input", "xi", "allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'constraint', 'ignore'");
@@ -156,7 +156,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// output options
- po::options_description output_opts("Output Options");
+ po::options_description output_opts("Output Options");
AddParam(output_opts,"report-all-factors", "report all factors in output, not just first");
AddParam(output_opts,"output-factors", "list if factors in the output");
AddParam(output_opts,"print-id", "prefix translations with id. Default if false");
@@ -192,7 +192,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// nbest-options
- po::options_description nbest_opts("N-best Options");
+ po::options_description nbest_opts("N-best Options");
AddParam(nbest_opts,"n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
// AddParam(nbest_opts,"n-best-list-file", "file of n-best-list to be generated; specify - as the file in order to write to STDOUT");
// AddParam(nbest_opts,"n-best-list-size", "size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
@@ -202,22 +202,22 @@ Parameter::Parameter()
AddParam(nbest_opts,"report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
AddParam(nbest_opts,"lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
AddParam(nbest_opts,"include-segmentation-in-n-best", "include phrasal segmentation in the n-best list. default is false");
- AddParam(nbest_opts,"print-alignment-info-in-n-best",
+ AddParam(nbest_opts,"print-alignment-info-in-n-best",
"Include word-to-word alignment in the n-best list. Word-to-word alignments are taken from the phrase table if any. Default is false");
///////////////////////////////////////////////////////////////////////////////////////
// server options
- po::options_description server_opts("Moses Server Options");
+ po::options_description server_opts("Moses Server Options");
AddParam(server_opts,"server", "Run moses as a translation server.");
AddParam(server_opts,"server-port", "Port for moses server");
AddParam(server_opts,"server-log", "Log destination for moses server");
AddParam(server_opts,"serial", "Run server in serial mode, processing only one request at a time.");
- po::options_description irstlm_opts("IRSTLM Options");
- AddParam(irstlm_opts,"clean-lm-cache",
+ po::options_description irstlm_opts("IRSTLM Options");
+ AddParam(irstlm_opts,"clean-lm-cache",
"clean language model caches after N translations (default N=1)");
- po::options_description chart_opts("Chart Decoding Options");
+ po::options_description chart_opts("Chart Decoding Options");
AddParam(chart_opts,"max-chart-span", "maximum num. of source word chart rules can consume (default 10)");
AddParam(chart_opts,"non-terminals", "list of non-term symbols, space separated");
AddParam(chart_opts,"rule-limit", "a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE");
@@ -246,7 +246,7 @@ Parameter::Parameter()
po::options_description cpt_opts("Options when using compact phrase and reordering tables.");
AddParam(cpt_opts,"minphr-memory", "Load phrase table in minphr format into memory");
AddParam(cpt_opts,"minlexr-memory", "Load lexical reordering table in minlexr format into memory");
-
+
po::options_description spe_opts("Simulated Post-editing Options");
AddParam(spe_opts,"spe-src", "Simulated post-editing. Source filename");
AddParam(spe_opts,"spe-trg", "Simulated post-editing. Target filename");
@@ -254,7 +254,7 @@ Parameter::Parameter()
///////////////////////////////////////////////////////////////////////////////////////
// DEPRECATED options
- po::options_description deprec_opts("Deprecated Options");
+ po::options_description deprec_opts("Deprecated Options");
AddParam(deprec_opts,"link-param-count", "DEPRECATED. DO NOT USE. Number of parameters on word links when using confusion networks or lattices (default = 1)");
AddParam(deprec_opts,"weight-slm", "slm", "DEPRECATED. DO NOT USE. weight(s) for syntactic language model");
AddParam(deprec_opts,"weight-bl", "bl", "DEPRECATED. DO NOT USE. weight for bleu score feature");
@@ -296,7 +296,7 @@ Parameter::Parameter()
AddParam(deprec_opts,"source-word-deletion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned source word");
AddParam(deprec_opts,"word-translation-feature", "DEPRECATED. DO NOT USE. Count feature for word translation according to word alignment");
- po::options_description zombie_opts("Zombie Options");
+ po::options_description zombie_opts("Zombie Options");
AddParam(zombie_opts,"distortion-file", "source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables");
@@ -343,10 +343,10 @@ const PARAM_VEC *Parameter::GetParam(const std::string &paramName) const
}
/** initialize a parameter, sub of constructor */
-void
+void
Parameter::
AddParam(po::options_description& optgroup,
- string const& paramName,
+ string const& paramName,
string const& description)
{
m_valid[paramName] = true;
@@ -355,11 +355,11 @@ AddParam(po::options_description& optgroup,
}
/** initialize a parameter (including abbreviation), sub of constructor */
-void
+void
Parameter::
AddParam(po::options_description& optgroup,
- string const& paramName,
- string const& abbrevName,
+ string const& paramName,
+ string const& abbrevName,
string const& description)
{
m_valid[paramName] = true;
@@ -377,14 +377,14 @@ AddParam(po::options_description& optgroup,
}
/** print descriptions of all parameters */
-void
+void
Parameter::
Explain()
{
cerr << "Usage:" << endl;
cerr << m_options << endl;
- // for(PARAM_STRING::const_iterator iterParam = m_description.begin();
- // iterParam != m_description.end(); iterParam++)
+ // for(PARAM_STRING::const_iterator iterParam = m_description.begin();
+ // iterParam != m_description.end(); iterParam++)
// {
// const string paramName = iterParam->first;
// const string paramDescription = iterParam->second;
@@ -399,7 +399,7 @@ Explain()
/** check whether an item on the command line is a switch or a value
* \param token token on the command line to checked **/
-bool
+bool
Parameter::
isOption(const char* token)
{
@@ -413,7 +413,7 @@ isOption(const char* token)
}
/** load all parameters from the configuration file and the command line switches */
-bool
+bool
Parameter::
LoadParam(const string &filePath)
{
@@ -422,11 +422,11 @@ LoadParam(const string &filePath)
}
/** load all parameters from the configuration file and the command line switches */
-bool
+bool
Parameter::
LoadParam(int argc, char* xargv[])
{
- // legacy parameter handling: all parameters are expected
+ // legacy parameter handling: all parameters are expected
// to start with a single dash
char* argv[argc+1];
for (int i = 0; i < argc; ++i)
@@ -435,7 +435,7 @@ LoadParam(int argc, char* xargv[])
if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == '-')
++argv[i];
}
-
+
// config file (-f) arg mandatory
string configPath;
if ( (configPath = FindParam("-f", argc, argv)) == ""
@@ -534,7 +534,7 @@ LoadParam(int argc, char* xargv[])
return Validate() && noErrorFlag;
}
-void
+void
Parameter::
AddFeaturesCmd()
{
@@ -550,7 +550,7 @@ AddFeaturesCmd()
}
}
-std::vector<float>
+std::vector<float>
Parameter::
GetWeights(const std::string &name)
{
@@ -564,7 +564,7 @@ GetWeights(const std::string &name)
return ret;
}
-void
+void
Parameter::
SetWeight(const std::string &name, size_t ind, float weight)
{
@@ -607,7 +607,7 @@ AddWeight(const std::string &name, size_t ind,
SetWeight(name, ind, weights);
}
-void
+void
Parameter::
ConvertWeightArgsSingleWeight(const string &oldWeightName, const string &newWeightName)
{
@@ -625,7 +625,7 @@ ConvertWeightArgsSingleWeight(const string &oldWeightName, const string &newWeig
}
}
-void
+void
Parameter::
ConvertWeightArgsPhraseModel(const string &oldWeightName)
{
@@ -798,7 +798,7 @@ ConvertWeightArgsPhraseModel(const string &oldWeightName)
}
-void
+void
Parameter::
AddFeature(const std::string &line)
{
@@ -806,7 +806,7 @@ AddFeature(const std::string &line)
features.push_back(line);
}
-void
+void
Parameter::
ConvertWeightArgsDistortion()
{
@@ -873,7 +873,7 @@ ConvertWeightArgsDistortion()
}
-void
+void
Parameter::
ConvertWeightArgsLM()
{
@@ -967,7 +967,7 @@ ConvertWeightArgsLM()
m_setting.erase(oldFeatureName);
}
-void
+void
Parameter::
ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string &newWeightName)
{
@@ -1011,7 +1011,7 @@ ConvertWeightArgsGeneration(const std::string &oldWeightName, const std::string
m_setting.erase(oldFeatureName);
}
-void
+void
Parameter::
ConvertWeightArgsWordPenalty()
{
@@ -1046,7 +1046,7 @@ ConvertWeightArgsWordPenalty()
}
-void
+void
Parameter::
ConvertPhrasePenalty()
{
@@ -1063,7 +1063,7 @@ ConvertPhrasePenalty()
}
}
-void
+void
Parameter::
ConvertWeightArgs()
{
@@ -1107,7 +1107,7 @@ ConvertWeightArgs()
}
-void
+void
Parameter::
CreateWeightsMap()
{
@@ -1115,7 +1115,7 @@ CreateWeightsMap()
CreateWeightsMap(m_setting["weight"]);
}
-void
+void
Parameter::
CreateWeightsMap(const PARAM_VEC &vec)
{
@@ -1137,7 +1137,7 @@ CreateWeightsMap(const PARAM_VEC &vec)
}
}
-void
+void
Parameter::
WeightOverwrite()
{
@@ -1196,7 +1196,7 @@ WeightOverwrite()
}
/** check that parameter settings make sense */
-bool
+bool
Parameter::
Validate()
{
@@ -1257,9 +1257,9 @@ Validate()
}
/** check whether a file exists */
-bool
+bool
Parameter::
-FilesExist(const string &paramName, int fieldNo,
+FilesExist(const string &paramName, int fieldNo,
std::vector<std::string> const& extensions)
{
typedef std::vector<std::string> StringVec;
@@ -1303,7 +1303,7 @@ FilesExist(const string &paramName, int fieldNo,
/** look for a switch in arg, update parameter */
// TODO arg parsing like this does not belong in the library, it belongs
// in moses-cmd
-string
+string
Parameter::
FindParam(const string &paramSwitch, int argc, char* argv[])
{
@@ -1325,7 +1325,7 @@ FindParam(const string &paramSwitch, int argc, char* argv[])
* \param paramName full name of parameter
* \param argc number of arguments on command line
* \param argv values of paramters on command line */
-void
+void
Parameter::
OverwriteParam(const string &paramSwitch, const string &paramName, int argc, char* argv[])
{
@@ -1353,7 +1353,7 @@ OverwriteParam(const string &paramSwitch, const string &paramName, int argc, cha
/** read parameters from a configuration file */
-bool
+bool
Parameter::
ReadConfigFile(const string &filePath )
{
@@ -1423,7 +1423,7 @@ std::ostream& operator<<(std::ostream &os, const Credit &credit)
return os;
}
-void
+void
Parameter::
PrintCredit()
{
@@ -1513,7 +1513,7 @@ PrintCredit()
/** update parameter settings with command line switches
* \param paramName full name of parameter
* \param values inew values for paramName */
-void
+void
Parameter::
OverwriteParam(const string &paramName, PARAM_VEC values)
{
@@ -1538,14 +1538,14 @@ OverwriteParam(const string &paramName, PARAM_VEC values)
VERBOSE(2, std::endl);
}
-void
+void
Parameter::
PrintFF() const
{
StaticData::Instance().GetFeatureRegistry().PrintFF();
}
-std::set<std::string>
+std::set<std::string>
Parameter::
GetWeightNames() const
{
@@ -1558,7 +1558,7 @@ GetWeightNames() const
return ret;
}
-void
+void
Parameter::
Save(const std::string path)
{
@@ -1586,9 +1586,9 @@ Save(const std::string path)
}
template<>
-void
+void
Parameter::
-SetParameter<bool>(bool &parameter, std::string const& parameterName,
+SetParameter<bool>(bool &parameter, std::string const& parameterName,
bool const& defaultValue) const
{
const PARAM_VEC *params = GetParam(parameterName);
diff --git a/moses/Parameter.h b/moses/Parameter.h
index dd967e925..90b18c427 100644
--- a/moses/Parameter.h
+++ b/moses/Parameter.h
@@ -51,8 +51,8 @@ protected:
PARAM_STRING m_abbreviation;
PARAM_STRING m_description;
PARAM_STRING m_fullname;
- // std::map<char,std::set<std::string> > m_confusable;
- // stores long parameter names that start with a letter that is also a short option.
+ // std::map<char,std::set<std::string> > m_confusable;
+ // stores long parameter names that start with a letter that is also a short option.
options_description m_options;
std::map<std::string, std::vector<float> > m_weights;
@@ -64,30 +64,30 @@ protected:
bool isOption(const char* token);
bool Validate();
- void
+ void
AddParam(options_description& optgroup,
value_semantic const* optvalue,
- std::string const& paramName,
+ std::string const& paramName,
std::string const& description);
- void
+ void
AddParam(options_description& optgroup,
- std::string const &paramName,
+ std::string const &paramName,
std::string const &description);
- void
+ void
AddParam(options_description& optgroup,
value_semantic const* optvalue,
std::string const& paramName,
- std::string const& abbrevName,
+ std::string const& abbrevName,
std::string const& description);
- void
+ void
AddParam(options_description& optgroup,
std::string const& paramName,
- std::string const& abbrevName,
+ std::string const& abbrevName,
std::string const& description);
-
+
void PrintCredit();
void PrintFF() const;
diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp
index 9397cba2b..d07fb5f00 100644
--- a/moses/ScoreComponentCollection.cpp
+++ b/moses/ScoreComponentCollection.cpp
@@ -66,7 +66,7 @@ RegisterScoreProducer(FeatureFunction* scoreProducer)
s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
VERBOSE(1, "FeatureFunction: "
<< scoreProducer->GetScoreProducerDescription()
- << " start: " << start
+ << " start: " << start
<< " end: " << (s_denseVectorSize-1) << endl);
}
@@ -193,19 +193,19 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
linesep = " ";
}
- std::vector<FeatureFunction*> const& all_ff
+ std::vector<FeatureFunction*> const& all_ff
= FeatureFunction::GetFeatureFunctions();
BOOST_FOREACH(FeatureFunction const* ff, all_ff)
{
string name = ff->GetScoreProducerDescription();
size_t i = ff->GetIndex();
- if (ff->GetNumScoreComponents() == 1)
+ if (ff->GetNumScoreComponents() == 1)
out << name << sep << m_scores[i] << linesep;
- else
+ else
{
size_t stop = i + ff->GetNumScoreComponents();
boost::format fmt("%s_%d");
- for (size_t k = 1; i < stop; ++i, ++k)
+ for (size_t k = 1; i < stop; ++i, ++k)
out << fmt % name % k << sep << m_scores[i] << linesep;
}
}
diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h
index c75169a87..0dbdb366c 100644
--- a/moses/ScoreComponentCollection.h
+++ b/moses/ScoreComponentCollection.h
@@ -229,8 +229,8 @@ public:
//! Add scores from a single ScoreProducer only
//! The length of scores must be equal to the number of score components
//! produced by sp
- void
- PlusEquals(const FeatureFunction* sp,
+ void
+ PlusEquals(const FeatureFunction* sp,
const ScoreComponentCollection& scores) {
size_t i = sp->GetIndex();
size_t stop = i + sp->GetNumScoreComponents();
@@ -287,7 +287,7 @@ public:
//! to add the score from a single ScoreProducer that produces
//! a single value
void Assign(const FeatureFunction* sp, float score) {
-
+
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
"Feature function must must only contain 1 score");
m_scores[sp->GetIndex()] = score;
diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp
index 0c81510b3..cf866f933 100644
--- a/moses/Sentence.cpp
+++ b/moses/Sentence.cpp
@@ -43,7 +43,7 @@ Sentence::
Sentence() : Phrase(0) , InputType()
{
const StaticData& SD = StaticData::Instance();
- if (SD.IsSyntax())
+ if (SD.IsSyntax())
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
}
@@ -60,7 +60,7 @@ aux_init_partial_translation(string& line)
string sourceCompletedStr;
int loc1 = line.find( "|||", 0 );
int loc2 = line.find( "|||", loc1 + 3 );
- if (loc1 > -1 && loc2 > -1)
+ if (loc1 > -1 && loc2 > -1)
{
m_initialTargetPhrase = Trim(line.substr(0, loc1));
string scov = Trim(line.substr(loc1 + 3, loc2 - loc1 - 3));
@@ -68,14 +68,14 @@ aux_init_partial_translation(string& line)
m_sourceCompleted.resize(scov.size());
int contiguous = 1;
- for (size_t i = 0; i < scov.size(); ++i)
+ for (size_t i = 0; i < scov.size(); ++i)
{
- if (sourceCompletedStr.at(i) == '1')
+ if (sourceCompletedStr.at(i) == '1')
{
m_sourceCompleted[i] = true;
if (contiguous) m_frontSpanCoveredLength++;
- }
- else
+ }
+ else
{
m_sourceCompleted[i] = false;
contiguous = 0;
@@ -92,48 +92,48 @@ aux_interpret_sgml_markup(string& line)
typedef std::map<std::string, std::string> metamap;
metamap meta = ProcessAndStripSGML(line);
metamap::const_iterator i;
- if ((i = meta.find("id")) != meta.end())
+ if ((i = meta.find("id")) != meta.end())
this->SetTranslationId(atol(i->second.c_str()));
- if ((i = meta.find("docid")) != meta.end())
+ if ((i = meta.find("docid")) != meta.end())
{
this->SetDocumentId(atol(i->second.c_str()));
this->SetUseTopicId(false);
this->SetUseTopicIdAndProb(false);
}
- if ((i = meta.find("topic")) != meta.end())
+ if ((i = meta.find("topic")) != meta.end())
{
vector<string> topic_params;
boost::split(topic_params, i->second, boost::is_any_of("\t "));
- if (topic_params.size() == 1)
+ if (topic_params.size() == 1)
{
this->SetTopicId(atol(topic_params[0].c_str()));
this->SetUseTopicId(true);
this->SetUseTopicIdAndProb(false);
- }
- else
+ }
+ else
{
this->SetTopicIdAndProb(topic_params);
this->SetUseTopicId(false);
this->SetUseTopicIdAndProb(true);
}
}
- if ((i = meta.find("weight-setting")) != meta.end())
+ if ((i = meta.find("weight-setting")) != meta.end())
{
this->SetWeightSetting(i->second);
this->SetSpecifiesWeightSetting(true);
- StaticData::Instance().SetWeightSetting(i->second);
+ StaticData::Instance().SetWeightSetting(i->second);
// oh this is so horrible! Why does this have to be propagated globally?
// --- UG
- }
+ }
else this->SetSpecifiesWeightSetting(false);
}
-void
+void
Sentence::
aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
{
using namespace std;
- typedef map<string, string> str2str_map;
+ typedef map<string, string> str2str_map;
vector<str2str_map> meta = ProcessAndStripDLT(line);
BOOST_FOREACH(str2str_map const& M, meta)
{
@@ -148,7 +148,7 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
cbtm = PhraseDictionaryDynamicCacheBased::InstanceNonConst(id);
if (cbtm) cbtm->ExecuteDlt(M);
}
- if (i->second == "cblm")
+ if (i->second == "cblm")
{
DynamicCacheBasedLanguageModel* cblm;
cblm = DynamicCacheBasedLanguageModel::InstanceNonConst(id);
@@ -167,11 +167,11 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
const StaticData &SD = StaticData::Instance();
using namespace std;
- if (SD.GetXmlInputType() != XmlPassThrough)
+ if (SD.GetXmlInputType() != XmlPassThrough)
{
int offset = SD.IsSyntax() ? 1 : 0;
- bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
- m_reorderingConstraint,
+ bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
+ m_reorderingConstraint,
xmlWalls, placeholders, offset,
SD.GetXmlBrackets().first,
SD.GetXmlBrackets().second);
@@ -179,7 +179,7 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
}
}
-void
+void
Sentence::
init(string line, std::vector<FactorType> const& factorOrder)
{
@@ -189,15 +189,15 @@ init(string line, std::vector<FactorType> const& factorOrder)
m_frontSpanCoveredLength = 0;
m_sourceCompleted.resize(0);
- if (SD.ContinuePartialTranslation())
+ if (SD.ContinuePartialTranslation())
aux_init_partial_translation(line);
line = Trim(line);
aux_interpret_sgml_markup(line); // for "<seg id=..." markup
aux_interpret_dlt(line); // some poorly documented cache-based stuff
-
+
// if sentences is specified as "<passthrough tag1=""/>"
- if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled())
+ if (SD.IsPassthroughEnabled() || SD.IsPassthroughInNBestEnabled())
{
string pthru = PassthroughSGML(line,"passthrough");
this->SetPassthroughInformation(pthru);
@@ -212,19 +212,19 @@ init(string line, std::vector<FactorType> const& factorOrder)
ProcessPlaceholders(placeholders);
if (SD.IsSyntax()) InitStartEndWord();
-
+
// now that we have final word positions in phrase (from
// CreateFromString), we can make input phrase objects to go with
// our XmlOptions and create TranslationOptions
// only fill the vector if we are parsing XML
- if (SD.GetXmlInputType() != XmlPassThrough)
+ if (SD.GetXmlInputType() != XmlPassThrough)
{
m_xmlCoverageMap.assign(GetSize(), false);
BOOST_FOREACH(XmlOption* o, m_xmlOptions)
{
WordsRange const& r = o->range;
- for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
+ for(size_t j = r.GetStartPos(); j <= r.GetEndPos(); ++j)
m_xmlCoverageMap[j]=true;
}
}
@@ -233,7 +233,7 @@ init(string line, std::vector<FactorType> const& factorOrder)
m_reorderingConstraint.InitializeWalls(GetSize());
// set reordering walls, if "-monotone-at-punction" is set
- if (SD.UseReorderingConstraint() && GetSize())
+ if (SD.UseReorderingConstraint() && GetSize())
{
WordsRange r(0, GetSize()-1);
m_reorderingConstraint.SetMonotoneAtPunctuation(GetSubString(r));
@@ -244,10 +244,10 @@ init(string line, std::vector<FactorType> const& factorOrder)
if(xmlWalls[i] < GetSize()) // no buggy walls, please
m_reorderingConstraint.SetWall(xmlWalls[i], true);
m_reorderingConstraint.FinalizeWalls();
-
+
}
-int
+int
Sentence::
Read(std::istream& in,const std::vector<FactorType>& factorOrder)
{
@@ -258,7 +258,7 @@ Read(std::istream& in,const std::vector<FactorType>& factorOrder)
return 1;
}
-void
+void
Sentence::
ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders)
{
@@ -282,8 +282,8 @@ CreateTranslationOptionCollection(ttasksptr const& ttask) const
{
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold();
- TranslationOptionCollection *rv
- = new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
+ TranslationOptionCollection *rv
+ = new TranslationOptionCollectionText(ttask, *this, maxNoTransOptPerCoverage,
transOptThreshold);
assert(rv);
return rv;
@@ -385,8 +385,8 @@ CreateFromString(vector<FactorType> const& FOrder, string const& phraseString)
}
Sentence::
-Sentence(size_t const transId, string const& stext,
- vector<FactorType> const* IFO)
+Sentence(size_t const transId, string const& stext,
+ vector<FactorType> const* IFO)
: InputType(transId)
{
if (IFO) init(stext, *IFO);
diff --git a/moses/Sentence.h b/moses/Sentence.h
index 958b3ffc4..8a870f76b 100644
--- a/moses/Sentence.h
+++ b/moses/Sentence.h
@@ -63,7 +63,7 @@ namespace Moses
public:
Sentence();
- Sentence(size_t const transId, std::string const& stext,
+ Sentence(size_t const transId, std::string const& stext,
std::vector<FactorType> const* IFO = NULL);
// Sentence(size_t const transId, std::string const& stext);
~Sentence();
@@ -98,19 +98,19 @@ namespace Moses
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
void Print(std::ostream& out) const;
- TranslationOptionCollection*
+ TranslationOptionCollection*
CreateTranslationOptionCollection(ttasksptr const& ttask) const;
- virtual void
- CreateFromString(std::vector<FactorType> const &factorOrder,
- std::string const& phraseString);
+ virtual void
+ CreateFromString(std::vector<FactorType> const &factorOrder,
+ std::string const& phraseString);
const NonTerminalSet&
- GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const
+ GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const
{ return m_defaultLabelSet; }
- void
+ void
init(std::string line, std::vector<FactorType> const& factorOrder);
private:
@@ -120,13 +120,13 @@ namespace Moses
// void aux_interpret_xml (std::string& line, std::vector<size_t> & xmlWalls,
// std::vector<std::pair<size_t, std::string> >& placeholders);
- void
+ void
aux_interpret_sgml_markup(std::string& line);
- void
+ void
aux_interpret_dlt(std::string& line);
- void
+ void
aux_interpret_xml
(std::string& line, std::vector<size_t> & xmlWalls,
std::vector<std::pair<size_t, std::string> >& placeholders);
@@ -135,7 +135,7 @@ namespace Moses
aux_init_partial_translation(std::string& line);
};
-
+
}
diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp
index bf6f5137f..420ad7a20 100644
--- a/moses/StaticData.cpp
+++ b/moses/StaticData.cpp
@@ -99,7 +99,7 @@ bool StaticData::LoadDataStatic(Parameter *parameter, const std::string &execPat
return s_instance.LoadData(parameter);
}
-void
+void
StaticData
::initialize_features()
{
@@ -117,7 +117,7 @@ StaticData
vector<string> toks = Tokenize(line);
string &feature = toks[0];
- std::map<std::string, std::string>::const_iterator iter
+ std::map<std::string, std::string>::const_iterator iter
= featureNameOverride.find(feature);
if (iter == featureNameOverride.end()) {
// feature name not override
@@ -136,7 +136,7 @@ StaticData
}
-void
+void
StaticData
::ini_input_options()
{
@@ -145,7 +145,7 @@ StaticData
// input type has to be specified BEFORE loading the phrase tables!
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
- m_parameter->SetParameter(m_continuePartialTranslation,
+ m_parameter->SetParameter(m_continuePartialTranslation,
"continue-partial-translation", false );
std::string s_it = "text input";
@@ -177,12 +177,12 @@ StaticData
<< m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
}
- m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange,
+ m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange,
"default-non-term-for-empty-range-only", false );
-
+
}
-bool
+bool
StaticData
::ini_output_options()
{
@@ -346,25 +346,25 @@ StaticData
m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
- m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
+ m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
"print-alignment-info-in-n-best", false );
// include feature names in the n-best list
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
-
+
// include word alignment in the n-best list
- m_parameter->SetParameter(m_nBestIncludesSegmentation,
+ m_parameter->SetParameter(m_nBestIncludesSegmentation,
"include-segmentation-in-n-best", false );
-
+
// print all factors of output translations
- m_parameter->SetParameter(m_reportAllFactorsNBest,
+ m_parameter->SetParameter(m_reportAllFactorsNBest,
"report-all-factors-in-n-best", false );
-
+
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
return true;
}
-void
+void
StaticData
::ini_compact_table_options()
{
@@ -381,7 +381,7 @@ StaticData
}
// threads, timeouts, etc.
-bool
+bool
StaticData
::ini_performance_options()
{
@@ -411,7 +411,7 @@ StaticData
}
#ifndef WITH_THREADS
if (m_threadCount > 1) {
- std::cerr << "Error: Thread count of " << params->at(0)
+ std::cerr << "Error: Thread count of " << params->at(0)
<< " but moses not built with thread support";
return false;
}
@@ -425,15 +425,15 @@ void
StaticData
::ini_cube_pruning_options()
{
- m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
+ m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
DEFAULT_CUBE_PRUNING_POP_LIMIT);
- m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
+ m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
DEFAULT_CUBE_PRUNING_DIVERSITY);
- m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
+ m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
false);
}
-void
+void
StaticData
::ini_factor_maps()
{
@@ -646,7 +646,7 @@ bool StaticData::LoadData(Parameter *parameter)
ini_mira_options();
// S2T decoder
- m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
+ m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
RecursiveCYKPlus);
@@ -657,14 +657,14 @@ bool StaticData::LoadData(Parameter *parameter)
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
initialize_features();
- if (m_parameter->GetParam("show-weights") == NULL)
+ if (m_parameter->GetParam("show-weights") == NULL)
LoadFeatureFunctions();
LoadDecodeGraphs();
// sanity check that there are no weights without an associated FF
if (!CheckWeights()) return false;
-
+
//Load extra feature weights
string weightFile;
m_parameter->SetParameter<string>(weightFile, "weight-file", "");
@@ -680,19 +680,19 @@ bool StaticData::LoadData(Parameter *parameter)
//Load sparse features from config (overrules weight file)
LoadSparseWeightsFromConfig();
- // load alternate weight settings
+ // load alternate weight settings
+ //
+ // When and where are these used??? [UG]
//
- // When and where are these used??? [UG]
- //
// Update: Just checked the manual. The config file is NOT the right
// place to do this. [UG]
- //
- // <TODO>
+ //
+ // <TODO>
// * Eliminate alternate-weight-setting. Alternate weight settings should
- // be provided with the input, not in the config file.
+ // be provided with the input, not in the config file.
// </TODO>
params = m_parameter->GetParam("alternate-weight-setting");
- if (params && params->size() && !LoadAlternateWeightSettings())
+ if (params && params->size() && !LoadAlternateWeightSettings())
return false;
return true;
@@ -1011,11 +1011,11 @@ float StaticData::GetWeightWordPenalty() const
return weightWP;
}
-void
+void
StaticData
::InitializeForInput(ttasksptr const& ttask) const
{
- const std::vector<FeatureFunction*> &producers
+ const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i];
@@ -1029,11 +1029,11 @@ StaticData
}
}
-void
+void
StaticData
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
{
- const std::vector<FeatureFunction*> &producers
+ const std::vector<FeatureFunction*> &producers
= FeatureFunction::GetFeatureFunctions();
for(size_t i=0; i<producers.size(); ++i) {
FeatureFunction &ff = *producers[i];
@@ -1118,7 +1118,7 @@ bool StaticData::CheckWeights() const
}
return false;
}
-
+
return true;
}
@@ -1268,7 +1268,7 @@ void StaticData::NoCache()
}
}
-std::map<std::string, std::string>
+std::map<std::string, std::string>
StaticData
::OverrideFeatureNames()
{
diff --git a/moses/StaticData.h b/moses/StaticData.h
index baaeebafa..438ac0633 100644
--- a/moses/StaticData.h
+++ b/moses/StaticData.h
@@ -476,17 +476,17 @@ public:
// m_searchAlgorithm == SyntaxF2S;
// }
- bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const
+ bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const
{
- if (algo == DefaultSearchAlgorithm)
+ if (algo == DefaultSearchAlgorithm)
algo = m_searchAlgorithm;
return (algo == CYKPlus || algo == ChartIncremental ||
algo == SyntaxS2T || algo == SyntaxT2S ||
algo == SyntaxF2S || algo == SyntaxT2S_SCFG);
}
-
- const ScoreComponentCollection&
- GetAllWeights() const
+
+ const ScoreComponentCollection&
+ GetAllWeights() const
{ return m_allWeights; }
void SetAllWeights(const ScoreComponentCollection& weights) {
diff --git a/moses/Syntax/F2S/Manager.h b/moses/Syntax/F2S/Manager.h
index 44128ad65..1dcab4f5e 100644
--- a/moses/Syntax/F2S/Manager.h
+++ b/moses/Syntax/F2S/Manager.h
@@ -38,7 +38,7 @@ public:
const SHyperedge *GetBestSHyperedge() const;
typedef std::vector<boost::shared_ptr<KBestExtractor::Derivation> > kBestList_t;
- void ExtractKBest(std::size_t k, kBestList_t& kBestList,
+ void ExtractKBest(std::size_t k, kBestList_t& kBestList,
bool onlyDistinct=false) const;
void OutputDetailedTranslationReport(OutputCollector *collector) const;
diff --git a/moses/TargetPhrase.cpp b/moses/TargetPhrase.cpp
index d6abd16ff..5a26e44cc 100644
--- a/moses/TargetPhrase.cpp
+++ b/moses/TargetPhrase.cpp
@@ -223,8 +223,8 @@ void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProduce
m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string());
}
-boost::shared_ptr<Scores>
-mergescores(boost::shared_ptr<Scores> const& a,
+boost::shared_ptr<Scores>
+mergescores(boost::shared_ptr<Scores> const& a,
boost::shared_ptr<Scores> const& b)
{
boost::shared_ptr<Scores> ret;
@@ -243,7 +243,7 @@ mergescores(boost::shared_ptr<Scores> const& a,
return ret;
}
-void
+void
TargetPhrase::
Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec)
{
@@ -256,7 +256,7 @@ Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec)
BOOST_FOREACH(item const& s, copy.m_cached_scores)
{
pair<iter,bool> foo = m_cached_scores.insert(s);
- if (foo.second == false)
+ if (foo.second == false)
foo.first->second = mergescores(foo.first->second, s.second);
}
}
@@ -278,8 +278,8 @@ GetExtraScores(FeatureFunction const* ff) const
void
TargetPhrase::
-SetExtraScores(FeatureFunction const* ff,
- boost::shared_ptr<Scores> const& s)
+SetExtraScores(FeatureFunction const* ff,
+ boost::shared_ptr<Scores> const& s)
{ m_cached_scores[ff] = s; }
diff --git a/moses/TargetPhrase.h b/moses/TargetPhrase.h
index 8230373db..1f5960121 100644
--- a/moses/TargetPhrase.h
+++ b/moses/TargetPhrase.h
@@ -52,16 +52,16 @@ class PhraseDictionary;
class TargetPhrase: public Phrase
{
public:
- typedef std::map<FeatureFunction const*, boost::shared_ptr<Scores> >
+ typedef std::map<FeatureFunction const*, boost::shared_ptr<Scores> >
ScoreCache_t;
ScoreCache_t const& GetExtraScores() const;
Scores const* GetExtraScores(FeatureFunction const* ff) const;
- void SetExtraScores(FeatureFunction const* ff,
+ void SetExtraScores(FeatureFunction const* ff,
boost::shared_ptr<Scores> const& scores);
-
+
private:
- ScoreCache_t m_cached_scores;
-
+ ScoreCache_t m_cached_scores;
+
private:
friend std::ostream& operator<<(std::ostream&, const TargetPhrase&);
friend void swap(TargetPhrase &first, TargetPhrase &second);
@@ -198,7 +198,7 @@ public:
return found->second;
}
-
+
// To be set by the FF that needs it, by default the rule source = NULL
// make a copy of the source side of the rule
diff --git a/moses/ThreadPool.h b/moses/ThreadPool.h
index 024d1c54d..b7d459bb2 100644
--- a/moses/ThreadPool.h
+++ b/moses/ThreadPool.h
@@ -27,7 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <boost/shared_ptr.hpp>
-
+
#ifdef WITH_THREADS
#include <boost/bind.hpp>
#include <boost/thread.hpp>
diff --git a/moses/TrainingTask.h b/moses/TrainingTask.h
index d75a43045..6166b4d42 100644
--- a/moses/TrainingTask.h
+++ b/moses/TrainingTask.h
@@ -5,7 +5,7 @@
#include "moses/ThreadPool.h"
#include "moses/TranslationOptionCollection.h"
#include "moses/IOWrapper.h"
-#include "moses/TranslationTask.h"
+#include "moses/TranslationTask.h"
namespace Moses
{
@@ -17,17 +17,17 @@ class TrainingTask : public Moses::TranslationTask
{
protected:
- TrainingTask(boost::shared_ptr<Moses::InputType> const source,
+ TrainingTask(boost::shared_ptr<Moses::InputType> const source,
boost::shared_ptr<Moses::IOWrapper> const ioWrapper)
- : TranslationTask(source, ioWrapper)
+ : TranslationTask(source, ioWrapper)
{ }
-
+
public:
// factory function
- static boost::shared_ptr<TrainingTask>
+ static boost::shared_ptr<TrainingTask>
create(boost::shared_ptr<InputType> const& source)
- {
+ {
boost::shared_ptr<IOWrapper> nix;
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, nix));
ret->m_self = ret;
@@ -35,8 +35,8 @@ public:
}
// factory function
- static boost::shared_ptr<TrainingTask>
- create(boost::shared_ptr<InputType> const& source,
+ static boost::shared_ptr<TrainingTask>
+ create(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper)
{
boost::shared_ptr<TrainingTask> ret(new TrainingTask(source, ioWrapper));
@@ -44,7 +44,7 @@ public:
return ret;
}
- ~TrainingTask()
+ ~TrainingTask()
{ }
void Run() {
@@ -52,7 +52,7 @@ public:
std::cerr << *m_source << std::endl;
- TranslationOptionCollection *transOptColl
+ TranslationOptionCollection *transOptColl
= m_source->CreateTranslationOptionCollection(this->self());
transOptColl->CreateTranslationOptions();
delete transOptColl;
diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.h b/moses/TranslationModel/CompactPT/BlockHashIndex.h
index b3f5e6f4b..130dd89fc 100644
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.h
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h
@@ -161,8 +161,8 @@ public:
}
#ifdef WITH_THREADS
-
- boost::shared_ptr<HashTask<Keys> >
+
+ boost::shared_ptr<HashTask<Keys> >
ht(new HashTask<Keys>(current, *this, keys));
m_threadPool.Submit(ht);
#else
diff --git a/moses/TranslationModel/CompactPT/MurmurHash3.cpp b/moses/TranslationModel/CompactPT/MurmurHash3.cpp
index fb6946bbc..dfde88708 100644
--- a/moses/TranslationModel/CompactPT/MurmurHash3.cpp
+++ b/moses/TranslationModel/CompactPT/MurmurHash3.cpp
@@ -1,425 +1,425 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-// Note - The x86 and x64 versions do _not_ produce the same results, as the
-// algorithms are optimized for their respective platforms. You can still
-// compile and run any of them on any platform, but your performance with the
-// non-native version will be less than optimal.
-
-#include "MurmurHash3.h"
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE __forceinline
-
-#include <cstdlib>
-
-#define ROTL32(x,y) _rotl(x,y)
-#define ROTL64(x,y) _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#define FORCE_INLINE inline __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
- return (x << r) | (x >> (32 - r));
-}
-
-inline uint64_t rotl64 ( uint64_t x, int8_t r )
-{
- return (x << r) | (x >> (64 - r));
-}
-
-#define ROTL32(x,y) rotl32(x,y)
-#define ROTL64(x,y) rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x##LLU)
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-// Block read - if your platform needs to do endian-swapping or can only
-// handle aligned reads, do the conversion here
-
-FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
-{
- return p[i];
-}
-
-FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
-{
- return p[i];
-}
-
-//-----------------------------------------------------------------------------
-// Finalization mix - force all bits of a hash block to avalanche
-
-FORCE_INLINE uint32_t fmix ( uint32_t h )
-{
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
-
- return h;
-}
-
-//----------
-
-FORCE_INLINE uint64_t fmix ( uint64_t k )
-{
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xff51afd7ed558ccd);
- k ^= k >> 33;
- k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
- k ^= k >> 33;
-
- return k;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len,
- uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 4;
-
- uint32_t h1 = seed;
-
- uint32_t c1 = 0xcc9e2d51;
- uint32_t c2 = 0x1b873593;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
-
- for(int i = -nblocks; i; i++) {
- uint32_t k1 = getblock(blocks,i);
-
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
-
- h1 ^= k1;
- h1 = ROTL32(h1,13);
- h1 = h1*5+0xe6546b64;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
-
- uint32_t k1 = 0;
-
- switch(len & 3) {
- case 3:
- k1 ^= tail[2] << 16;
- case 2:
- k1 ^= tail[1] << 8;
- case 1:
- k1 ^= tail[0];
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
-
- h1 = fmix(h1);
-
- *(uint32_t*)out = h1;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_128 ( const void * key, const int len,
- uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 16;
-
- uint32_t h1 = seed;
- uint32_t h2 = seed;
- uint32_t h3 = seed;
- uint32_t h4 = seed;
-
- uint32_t c1 = 0x239b961b;
- uint32_t c2 = 0xab0e9789;
- uint32_t c3 = 0x38b34ae5;
- uint32_t c4 = 0xa1e38b93;
-
- //----------
- // body
-
- const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
-
- for(int i = -nblocks; i; i++) {
- uint32_t k1 = getblock(blocks,i*4+0);
- uint32_t k2 = getblock(blocks,i*4+1);
- uint32_t k3 = getblock(blocks,i*4+2);
- uint32_t k4 = getblock(blocks,i*4+3);
-
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
- h1 ^= k1;
-
- h1 = ROTL32(h1,19);
- h1 += h2;
- h1 = h1*5+0x561ccd1b;
-
- k2 *= c2;
- k2 = ROTL32(k2,16);
- k2 *= c3;
- h2 ^= k2;
-
- h2 = ROTL32(h2,17);
- h2 += h3;
- h2 = h2*5+0x0bcaa747;
-
- k3 *= c3;
- k3 = ROTL32(k3,17);
- k3 *= c4;
- h3 ^= k3;
-
- h3 = ROTL32(h3,15);
- h3 += h4;
- h3 = h3*5+0x96cd1c35;
-
- k4 *= c4;
- k4 = ROTL32(k4,18);
- k4 *= c1;
- h4 ^= k4;
-
- h4 = ROTL32(h4,13);
- h4 += h1;
- h4 = h4*5+0x32ac3b17;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
- uint32_t k1 = 0;
- uint32_t k2 = 0;
- uint32_t k3 = 0;
- uint32_t k4 = 0;
-
- switch(len & 15) {
- case 15:
- k4 ^= tail[14] << 16;
- case 14:
- k4 ^= tail[13] << 8;
- case 13:
- k4 ^= tail[12] << 0;
- k4 *= c4;
- k4 = ROTL32(k4,18);
- k4 *= c1;
- h4 ^= k4;
-
- case 12:
- k3 ^= tail[11] << 24;
- case 11:
- k3 ^= tail[10] << 16;
- case 10:
- k3 ^= tail[ 9] << 8;
- case 9:
- k3 ^= tail[ 8] << 0;
- k3 *= c3;
- k3 = ROTL32(k3,17);
- k3 *= c4;
- h3 ^= k3;
-
- case 8:
- k2 ^= tail[ 7] << 24;
- case 7:
- k2 ^= tail[ 6] << 16;
- case 6:
- k2 ^= tail[ 5] << 8;
- case 5:
- k2 ^= tail[ 4] << 0;
- k2 *= c2;
- k2 = ROTL32(k2,16);
- k2 *= c3;
- h2 ^= k2;
-
- case 4:
- k1 ^= tail[ 3] << 24;
- case 3:
- k1 ^= tail[ 2] << 16;
- case 2:
- k1 ^= tail[ 1] << 8;
- case 1:
- k1 ^= tail[ 0] << 0;
- k1 *= c1;
- k1 = ROTL32(k1,15);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
- h2 ^= len;
- h3 ^= len;
- h4 ^= len;
-
- h1 += h2;
- h1 += h3;
- h1 += h4;
- h2 += h1;
- h3 += h1;
- h4 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
- h3 = fmix(h3);
- h4 = fmix(h4);
-
- h1 += h2;
- h1 += h3;
- h1 += h4;
- h2 += h1;
- h3 += h1;
- h4 += h1;
-
- ((uint32_t*)out)[0] = h1;
- ((uint32_t*)out)[1] = h2;
- ((uint32_t*)out)[2] = h3;
- ((uint32_t*)out)[3] = h4;
-}
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x64_128 ( const void * key, const int len,
- const uint32_t seed, void * out )
-{
- const uint8_t * data = (const uint8_t*)key;
- const int nblocks = len / 16;
-
- uint64_t h1 = seed;
- uint64_t h2 = seed;
-
- uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
- uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
-
- //----------
- // body
-
- const uint64_t * blocks = (const uint64_t *)(data);
-
- for(int i = 0; i < nblocks; i++) {
- uint64_t k1 = getblock(blocks,i*2+0);
- uint64_t k2 = getblock(blocks,i*2+1);
-
- k1 *= c1;
- k1 = ROTL64(k1,31);
- k1 *= c2;
- h1 ^= k1;
-
- h1 = ROTL64(h1,27);
- h1 += h2;
- h1 = h1*5+0x52dce729;
-
- k2 *= c2;
- k2 = ROTL64(k2,33);
- k2 *= c1;
- h2 ^= k2;
-
- h2 = ROTL64(h2,31);
- h2 += h1;
- h2 = h2*5+0x38495ab5;
- }
-
- //----------
- // tail
-
- const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
-
- uint64_t k1 = 0;
- uint64_t k2 = 0;
-
- switch(len & 15) {
- case 15:
- k2 ^= uint64_t(tail[14]) << 48;
- case 14:
- k2 ^= uint64_t(tail[13]) << 40;
- case 13:
- k2 ^= uint64_t(tail[12]) << 32;
- case 12:
- k2 ^= uint64_t(tail[11]) << 24;
- case 11:
- k2 ^= uint64_t(tail[10]) << 16;
- case 10:
- k2 ^= uint64_t(tail[ 9]) << 8;
- case 9:
- k2 ^= uint64_t(tail[ 8]) << 0;
- k2 *= c2;
- k2 = ROTL64(k2,33);
- k2 *= c1;
- h2 ^= k2;
-
- case 8:
- k1 ^= uint64_t(tail[ 7]) << 56;
- case 7:
- k1 ^= uint64_t(tail[ 6]) << 48;
- case 6:
- k1 ^= uint64_t(tail[ 5]) << 40;
- case 5:
- k1 ^= uint64_t(tail[ 4]) << 32;
- case 4:
- k1 ^= uint64_t(tail[ 3]) << 24;
- case 3:
- k1 ^= uint64_t(tail[ 2]) << 16;
- case 2:
- k1 ^= uint64_t(tail[ 1]) << 8;
- case 1:
- k1 ^= uint64_t(tail[ 0]) << 0;
- k1 *= c1;
- k1 = ROTL64(k1,31);
- k1 *= c2;
- h1 ^= k1;
- };
-
- //----------
- // finalization
-
- h1 ^= len;
- h2 ^= len;
-
- h1 += h2;
- h2 += h1;
-
- h1 = fmix(h1);
- h2 = fmix(h2);
-
- h1 += h2;
- h2 += h1;
-
- ((uint64_t*)out)[0] = h1;
- ((uint64_t*)out)[1] = h2;
-}
-
-//-----------------------------------------------------------------------------
-
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE __forceinline
+
+#include <cstdlib>
+
+#define ROTL32(x,y) _rotl(x,y)
+#define ROTL64(x,y) _rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+ return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL32(x,y) rotl32(x,y)
+#define ROTL64(x,y) rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+ return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+ return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+ uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 4;
+
+ uint32_t h1 = seed;
+
+ uint32_t c1 = 0xcc9e2d51;
+ uint32_t c2 = 0x1b873593;
+
+ //----------
+ // body
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+ for(int i = -nblocks; i; i++) {
+ uint32_t k1 = getblock(blocks,i);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1*5+0xe6546b64;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+ uint32_t k1 = 0;
+
+ switch(len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ case 2:
+ k1 ^= tail[1] << 8;
+ case 1:
+ k1 ^= tail[0];
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+
+ h1 = fmix(h1);
+
+ *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+ uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint32_t h1 = seed;
+ uint32_t h2 = seed;
+ uint32_t h3 = seed;
+ uint32_t h4 = seed;
+
+ uint32_t c1 = 0x239b961b;
+ uint32_t c2 = 0xab0e9789;
+ uint32_t c3 = 0x38b34ae5;
+ uint32_t c4 = 0xa1e38b93;
+
+ //----------
+ // body
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+ for(int i = -nblocks; i; i++) {
+ uint32_t k1 = getblock(blocks,i*4+0);
+ uint32_t k2 = getblock(blocks,i*4+1);
+ uint32_t k3 = getblock(blocks,i*4+2);
+ uint32_t k4 = getblock(blocks,i*4+3);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = ROTL32(h1,19);
+ h1 += h2;
+ h1 = h1*5+0x561ccd1b;
+
+ k2 *= c2;
+ k2 = ROTL32(k2,16);
+ k2 *= c3;
+ h2 ^= k2;
+
+ h2 = ROTL32(h2,17);
+ h2 += h3;
+ h2 = h2*5+0x0bcaa747;
+
+ k3 *= c3;
+ k3 = ROTL32(k3,17);
+ k3 *= c4;
+ h3 ^= k3;
+
+ h3 = ROTL32(h3,15);
+ h3 += h4;
+ h3 = h3*5+0x96cd1c35;
+
+ k4 *= c4;
+ k4 = ROTL32(k4,18);
+ k4 *= c1;
+ h4 ^= k4;
+
+ h4 = ROTL32(h4,13);
+ h4 += h1;
+ h4 = h4*5+0x32ac3b17;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint32_t k1 = 0;
+ uint32_t k2 = 0;
+ uint32_t k3 = 0;
+ uint32_t k4 = 0;
+
+ switch(len & 15) {
+ case 15:
+ k4 ^= tail[14] << 16;
+ case 14:
+ k4 ^= tail[13] << 8;
+ case 13:
+ k4 ^= tail[12] << 0;
+ k4 *= c4;
+ k4 = ROTL32(k4,18);
+ k4 *= c1;
+ h4 ^= k4;
+
+ case 12:
+ k3 ^= tail[11] << 24;
+ case 11:
+ k3 ^= tail[10] << 16;
+ case 10:
+ k3 ^= tail[ 9] << 8;
+ case 9:
+ k3 ^= tail[ 8] << 0;
+ k3 *= c3;
+ k3 = ROTL32(k3,17);
+ k3 *= c4;
+ h3 ^= k3;
+
+ case 8:
+ k2 ^= tail[ 7] << 24;
+ case 7:
+ k2 ^= tail[ 6] << 16;
+ case 6:
+ k2 ^= tail[ 5] << 8;
+ case 5:
+ k2 ^= tail[ 4] << 0;
+ k2 *= c2;
+ k2 = ROTL32(k2,16);
+ k2 *= c3;
+ h2 ^= k2;
+
+ case 4:
+ k1 ^= tail[ 3] << 24;
+ case 3:
+ k1 ^= tail[ 2] << 16;
+ case 2:
+ k1 ^= tail[ 1] << 8;
+ case 1:
+ k1 ^= tail[ 0] << 0;
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+ h2 ^= len;
+ h3 ^= len;
+ h4 ^= len;
+
+ h1 += h2;
+ h1 += h3;
+ h1 += h4;
+ h2 += h1;
+ h3 += h1;
+ h4 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+ h3 = fmix(h3);
+ h4 = fmix(h4);
+
+ h1 += h2;
+ h1 += h3;
+ h1 += h4;
+ h2 += h1;
+ h3 += h1;
+ h4 += h1;
+
+ ((uint32_t*)out)[0] = h1;
+ ((uint32_t*)out)[1] = h2;
+ ((uint32_t*)out)[2] = h3;
+ ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+ const uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint64_t h1 = seed;
+ uint64_t h2 = seed;
+
+ uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+ uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+ //----------
+ // body
+
+ const uint64_t * blocks = (const uint64_t *)(data);
+
+ for(int i = 0; i < nblocks; i++) {
+ uint64_t k1 = getblock(blocks,i*2+0);
+ uint64_t k2 = getblock(blocks,i*2+1);
+
+ k1 *= c1;
+ k1 = ROTL64(k1,31);
+ k1 *= c2;
+ h1 ^= k1;
+
+ h1 = ROTL64(h1,27);
+ h1 += h2;
+ h1 = h1*5+0x52dce729;
+
+ k2 *= c2;
+ k2 = ROTL64(k2,33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ h2 = ROTL64(h2,31);
+ h2 += h1;
+ h2 = h2*5+0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint64_t k1 = 0;
+ uint64_t k2 = 0;
+
+ switch(len & 15) {
+ case 15:
+ k2 ^= uint64_t(tail[14]) << 48;
+ case 14:
+ k2 ^= uint64_t(tail[13]) << 40;
+ case 13:
+ k2 ^= uint64_t(tail[12]) << 32;
+ case 12:
+ k2 ^= uint64_t(tail[11]) << 24;
+ case 11:
+ k2 ^= uint64_t(tail[10]) << 16;
+ case 10:
+ k2 ^= uint64_t(tail[ 9]) << 8;
+ case 9:
+ k2 ^= uint64_t(tail[ 8]) << 0;
+ k2 *= c2;
+ k2 = ROTL64(k2,33);
+ k2 *= c1;
+ h2 ^= k2;
+
+ case 8:
+ k1 ^= uint64_t(tail[ 7]) << 56;
+ case 7:
+ k1 ^= uint64_t(tail[ 6]) << 48;
+ case 6:
+ k1 ^= uint64_t(tail[ 5]) << 40;
+ case 5:
+ k1 ^= uint64_t(tail[ 4]) << 32;
+ case 4:
+ k1 ^= uint64_t(tail[ 3]) << 24;
+ case 3:
+ k1 ^= uint64_t(tail[ 2]) << 16;
+ case 2:
+ k1 ^= uint64_t(tail[ 1]) << 8;
+ case 1:
+ k1 ^= uint64_t(tail[ 0]) << 0;
+ k1 *= c1;
+ k1 = ROTL64(k1,31);
+ k1 *= c2;
+ h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+ h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ ((uint64_t*)out)[0] = h1;
+ ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/moses/TranslationModel/CompactPT/MurmurHash3.h b/moses/TranslationModel/CompactPT/MurmurHash3.h
index 58e98204d..54e9d3f9e 100644
--- a/moses/TranslationModel/CompactPT/MurmurHash3.h
+++ b/moses/TranslationModel/CompactPT/MurmurHash3.h
@@ -1,37 +1,37 @@
-//-----------------------------------------------------------------------------
-// MurmurHash3 was written by Austin Appleby, and is placed in the public
-// domain. The author hereby disclaims copyright to this source code.
-
-#ifndef _MURMURHASH3_H_
-#define _MURMURHASH3_H_
-
-//-----------------------------------------------------------------------------
-// Platform-specific functions and macros
-
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-typedef unsigned char uint8_t;
-typedef unsigned long uint32_t;
-typedef unsigned __int64 uint64_t;
-
-// Other compilers
-
-#else // defined(_MSC_VER)
-
-#include <stdint.h>
-
-#endif // !defined(_MSC_VER)
-
-//-----------------------------------------------------------------------------
-
-void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
-
-void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
-
-//-----------------------------------------------------------------------------
-
-#endif // _MURMURHASH3_H_
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index 1e27e3ff0..2c1f1f39e 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -70,9 +70,9 @@ public:
**/
class PhraseDictionary : public DecodeFeature
{
- friend class PhraseDictionaryMultiModelCounts;
- // why is this necessary? that's a derived class, so it should have
- // access to the
+ friend class PhraseDictionaryMultiModelCounts;
+ // why is this necessary? that's a derived class, so it should have
+ // access to the
public:
virtual bool ProvidesPrefixCheck() const;
@@ -104,7 +104,7 @@ public:
virtual
bool
PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const;
-
+
// LEGACY!
// The preferred method is to override GetTargetPhraseCollectionBatch().
// See class PhraseDictionaryMemory or PhraseDictionaryOnDisk for details
@@ -119,7 +119,7 @@ public:
TargetPhraseCollection const *
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src)
{
- return GetTargetPhraseCollectionLEGACY(src);
+ return GetTargetPhraseCollectionLEGACY(src);
}
virtual
diff --git a/moses/TranslationModel/ProbingPT/hash.hh b/moses/TranslationModel/ProbingPT/hash.hh
index a4fcd6330..607238ae1 100644
--- a/moses/TranslationModel/ProbingPT/hash.hh
+++ b/moses/TranslationModel/ProbingPT/hash.hh
@@ -7,7 +7,7 @@
#include <vector>
//Gets the MurmurmurHash for give string
-uint64_t getHash(StringPiece text);
+uint64_t getHash(StringPiece text);
std::vector<uint64_t> getVocabIDs(StringPiece textin);
diff --git a/moses/TranslationModel/ProbingPT/storing.hh b/moses/TranslationModel/ProbingPT/storing.hh
index eb3b1ea53..e1be3bc87 100644
--- a/moses/TranslationModel/ProbingPT/storing.hh
+++ b/moses/TranslationModel/ProbingPT/storing.hh
@@ -2,7 +2,7 @@
#include <cstdio>
#include <fstream>
-#include <iostream>
+#include <iostream>
#include "hash.hh" //Includes line_splitter
#include "probing_hash_utils.hh"
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
index bf449247e..1217b9711 100644
--- a/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
+++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc
@@ -16,7 +16,7 @@ namespace Moses
if (a.tv_sec != b.tv_sec) return a.tv_sec > b.tv_sec;
return (a.tv_nsec >= b.tv_nsec);
}
-#endif
+#endif
bool operator<(timeval const& a, timeval const& b)
{
@@ -30,10 +30,10 @@ namespace Moses
return (a.tv_usec >= b.tv_usec);
}
- void
+ void
bubble_up(std::vector<TPCollWrapper*>& v, size_t k)
{
- if (k >= v.size()) return;
+ if (k >= v.size()) return;
for (;k && (v[k]->tstamp < v[k/2]->tstamp); k /=2)
{
std::swap(v[k],v[k/2]);
@@ -41,7 +41,7 @@ namespace Moses
}
}
- void
+ void
bubble_down(std::vector<TPCollWrapper*>& v, size_t k)
{
for (size_t j = 2*(k+1); j <= v.size(); j = 2*((k=j)+1))
@@ -62,7 +62,7 @@ namespace Moses
TPCollWrapper*
TPCollCache
- ::encache(TPCollWrapper* const& ptr)
+ ::encache(TPCollWrapper* const& ptr)
{
using namespace boost;
// update time stamp:
@@ -76,7 +76,7 @@ namespace Moses
{
vector<TPCollWrapper*>& v = m_history;
if (ptr->idx >= 0) // ptr is already in history
- {
+ {
assert(ptr == v[ptr->idx]);
size_t k = 2 * (ptr->idx + 1);
if (k < v.size()) bubble_up(v,k--);
@@ -88,7 +88,7 @@ namespace Moses
v.push_back(ptr);
bubble_up(v,k);
}
- else // someone else needs to go
+ else // someone else needs to go
{
v[0]->idx = -1;
release(v[0]);
@@ -98,28 +98,28 @@ namespace Moses
}
return ptr;
} // TPCollCache::encache(...)
-
- TPCollWrapper*
+
+ TPCollWrapper*
TPCollCache
- ::get(uint64_t key, size_t revision)
+ ::get(uint64_t key, size_t revision)
{
using namespace boost;
cache_t::iterator m;
- {
+ {
shared_lock<shared_mutex> lock(m_cache_lock);
m = m_cache.find(key);
- if (m == m_cache.end() || m->second->revision != revision)
+ if (m == m_cache.end() || m->second->revision != revision)
return NULL;
++m->second->refCount;
}
-
+
encache(m->second);
return NULL;
} // TPCollCache::get(...)
-
+
void
TPCollCache
- ::add(uint64_t key, TPCollWrapper* ptr)
+ ::add(uint64_t key, TPCollWrapper* ptr)
{
{
boost::unique_lock<boost::shared_mutex> lock(m_cache_lock);
@@ -129,7 +129,7 @@ namespace Moses
}
encache(ptr);
} // TPCollCache::add(...)
-
+
void
TPCollCache
::release(TPCollWrapper*& ptr)
@@ -137,25 +137,25 @@ namespace Moses
if (!ptr) return;
if (--ptr->refCount || ptr->idx >= 0) // tpc is still in use
- {
- ptr = NULL;
- return;
+ {
+ ptr = NULL;
+ return;
}
-
+
#if 0
timespec t; clock_gettime(CLOCK_MONOTONIC,&t);
timespec r; clock_getres(CLOCK_MONOTONIC,&r);
float delta = t.tv_sec - ptr->tstamp.tv_sec;
cerr << "deleting old cache entry after " << delta << " seconds."
- << " clock resolution is " << r.tv_sec << ":" << r.tv_nsec
+ << " clock resolution is " << r.tv_sec << ":" << r.tv_nsec
<< " at " << __FILE__ << ":" << __LINE__ << endl;
#endif
-
+
boost::upgrade_lock<boost::shared_mutex> lock(m_cache_lock);
cache_t::iterator m = m_cache.find(ptr->key);
if (m != m_cache.end() && m->second == ptr)
- { // the cache could have been updated with a new pointer
- // for the same phrase already, so we need to check
+ { // the cache could have been updated with a new pointer
+ // for the same phrase already, so we need to check
// if the pointer we cound is the one we want to get rid of,
// hence the second check
boost::upgrade_to_unique_lock<boost::shared_mutex> xlock(lock);
@@ -163,7 +163,7 @@ namespace Moses
}
delete ptr;
ptr = NULL;
- } // TPCollCache::release(...)
+ } // TPCollCache::release(...)
TPCollWrapper::
TPCollWrapper(size_t r, uint64_t k)
@@ -175,5 +175,5 @@ namespace Moses
{
assert(this->refCount == 0);
}
-
+
} // namespace
diff --git a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
index fc9ce8921..269200647 100644
--- a/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
+++ b/moses/TranslationModel/UG/TargetPhraseCollectionCache.h
@@ -5,15 +5,15 @@
namespace Moses
{
- class TPCollWrapper
+ class TPCollWrapper
// wrapper around TargetPhraseCollection that includes reference counts
// and a time stamp for least-recently-used caching of TargetPhraseCollection-s
: public TargetPhraseCollection
{
public:
- size_t const revision;
+ size_t const revision;
// revison; gets changed when the underlying corpus in Mmsapt is updated
-
+
uint64_t const key; // phrase key
uint32_t refCount; // reference count
#if defined(timespec) // timespec is better, but not available everywhere
@@ -32,12 +32,12 @@ namespace Moses
typedef std::vector<TPCollWrapper*> history_t;
cache_t m_cache; // maps from phrase ids to target phrase collections
mutable history_t m_history; // heap of live items, least recently used one on top
-
+
mutable boost::shared_mutex m_cache_lock; // locks m_cache
mutable boost::shared_mutex m_history_lock; // locks m_history
#if 0
- // mutable size_t m_tpc_ctr;
+ // mutable size_t m_tpc_ctr;
// counter of all live item, for debugging. probably obsolete; was used
// to track memory leaks
#endif
@@ -47,14 +47,14 @@ namespace Moses
public:
TPCollCache(size_t capacity=1000);
-
- TPCollWrapper*
+
+ TPCollWrapper*
get(uint64_t key, size_t revision);
- void
+ void
add(uint64_t key, TPCollWrapper* ptr);
- void
+ void
release(TPCollWrapper*& tpc);
};
diff --git a/moses/TranslationModel/UG/bitext-find.cc b/moses/TranslationModel/UG/bitext-find.cc
index 46978d16e..18cc6e0fa 100644
--- a/moses/TranslationModel/UG/bitext-find.cc
+++ b/moses/TranslationModel/UG/bitext-find.cc
@@ -30,15 +30,15 @@ write_sentence
}
}
-bool
-fill(string const& query, TSA<Token> const& tsa,
+bool
+fill(string const& query, TSA<Token> const& tsa,
TokenIndex const& V, bitvector& v)
{
v.resize(tsa.getCorpus()->size());
Bitext<Token>::iter m(&tsa);
- istringstream buf(query); string w;
- while (buf >> w)
- if (!m.extend(V[w]))
+ istringstream buf(query); string w;
+ while (buf >> w)
+ if (!m.extend(V[w]))
return false;
m.markSentences(v);
return true;
@@ -51,7 +51,7 @@ int main(int argc, char* argv[])
{
interpret_args(argc, argv);
if (Q1.empty() && Q2.empty()) exit(0);
-
+
mmbitext B; string w;
B.open(bname, L1, L2);
@@ -64,13 +64,13 @@ int main(int argc, char* argv[])
bitvector check(B.T1->size());
if (Q1.size() == 0 || Q2.size() == 0) check.set();
else (m2.markSentences(check));
-
+
Bitext<Token>::iter& m = m1.size() ? m1 : m2;
char const* x = m.lower_bound(-1);
char const* stop = m.upper_bound(-1);
uint64_t sid;
ushort off;
- boost::taus88 rnd;
+ boost::taus88 rnd;
size_t N = m.approxOccurrenceCount();
maxhits = min(N, maxhits);
size_t k = 0; // selected
@@ -80,7 +80,7 @@ int main(int argc, char* argv[])
x = m.root->readOffset(x,stop,off);
if (!check[sid]) continue;
- size_t r = (N - i) * rnd()/(rnd.max()+1.) + k;
+ size_t r = (N - i) * rnd()/(rnd.max()+1.) + k;
if (maxhits != N && r >= maxhits) continue;
++k;
@@ -94,20 +94,20 @@ int main(int argc, char* argv[])
// cout << "alignment failure" << endl;
}
- cout << sid << " " << B.docname(sid)
+ cout << sid << " " << B.docname(sid)
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
<< "\n";
write_sentence(*B.T1, sid, *B.V1, cout); cout << "\n";
write_sentence(*B.T2, sid, *B.V2, cout); cout << "\n";
- B.write_yawat_alignment(sid,
- m1.size() ? &m1 : NULL,
- m2.size() ? &m2 : NULL, cout);
+ B.write_yawat_alignment(sid,
+ m1.size() ? &m1 : NULL,
+ m2.size() ? &m2 : NULL, cout);
cout << endl;
-
+
}
}
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -120,7 +120,7 @@ interpret_args(int ac, char* av[])
("q1", po::value<string>(&Q1), "query in L1")
("q2", po::value<string>(&Q2), "query in L2")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name of corpus")
@@ -133,7 +133,7 @@ interpret_args(int ac, char* av[])
a.add("bname",1);
a.add("L1",1);
a.add("L2",1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h)
.positional(a)
@@ -141,7 +141,7 @@ interpret_args(int ac, char* av[])
po::notify(vm);
if (vm.count("help"))
{
- cout << "\nusage:\n\t" << av[0]
+ cout << "\nusage:\n\t" << av[0]
<< " [options] [--q1=<L1string>] [--q2=<L2string>]" << endl;
cout << o << endl;
exit(0);
diff --git a/moses/TranslationModel/UG/count-ptable-features.cc b/moses/TranslationModel/UG/count-ptable-features.cc
index b4d2cb4dd..4c9022075 100644
--- a/moses/TranslationModel/UG/count-ptable-features.cc
+++ b/moses/TranslationModel/UG/count-ptable-features.cc
@@ -21,6 +21,6 @@ int main()
cout << PT.GetFeatureNames().size() << endl;
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp b/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
index 073b64dfc..b87aa1d0c 100644
--- a/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
+++ b/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp
@@ -11,7 +11,7 @@ namespace ugdiss
using namespace boost::algorithm;
using namespace boost::iostreams;
- filtering_istream*
+ filtering_istream*
open_input_stream(string fname)
{
filtering_istream* ret = new filtering_istream();
@@ -19,7 +19,7 @@ namespace ugdiss
return ret;
}
- filtering_ostream*
+ filtering_ostream*
open_output_stream(string fname)
{
filtering_ostream* ret = new filtering_ostream();
@@ -27,7 +27,7 @@ namespace ugdiss
return ret;
}
- void
+ void
open_input_stream(string fname, filtering_istream& in)
{
if (ends_with(fname, ".gz"))
@@ -41,7 +41,7 @@ namespace ugdiss
in.push(file_source(fname.c_str()));
}
- void
+ void
open_output_stream(string fname, filtering_ostream& out)
{
if (ends_with(fname, ".gz") || ends_with(fname, ".gz_"))
diff --git a/moses/TranslationModel/UG/generic/file_io/ug_stream.h b/moses/TranslationModel/UG/generic/file_io/ug_stream.h
index e2c9e4764..5555e36f8 100644
--- a/moses/TranslationModel/UG/generic/file_io/ug_stream.h
+++ b/moses/TranslationModel/UG/generic/file_io/ug_stream.h
@@ -23,7 +23,7 @@ using namespace boost::iostreams;
/** open input file that is possibly compressed
* decompression filters are automatically added based on the file name
- * gzip for .gz; bzip2 for bz2.
+ * gzip for .gz; bzip2 for bz2.
*/
filtering_istream* open_input_stream(string fname);
void open_input_stream(string fname, filtering_istream& in);
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp b/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp
index 31927ac84..6c1644837 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp
+++ b/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp
@@ -11,7 +11,7 @@ namespace ugdiss
{
using namespace std;
- void
+ void
get_options(int ac, char* av[], progopts& o, posopts& a, optsmap& vm,
char const* cfgFileParam)
{
@@ -30,17 +30,17 @@ namespace ugdiss
}
else
{
- cerr << "Error: cannot find config file '"
+ cerr << "Error: cannot find config file '"
<< cfgFile << "'!" << endl;
exit(1);
}
}
}
-
+
// process positional args, ignoring those set in the config file
if (a.max_total_count())
po::store(po::command_line_parser(ac,av)
- .options(o).positional(a).run(),vm);
+ .options(o).positional(a).run(),vm);
po::notify(vm); // IMPORTANT
}
}
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_get_options.h b/moses/TranslationModel/UG/generic/program_options/ug_get_options.h
index 79b626ef5..636b11302 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_get_options.h
+++ b/moses/TranslationModel/UG/generic/program_options/ug_get_options.h
@@ -6,18 +6,18 @@
#include <boost/program_options.hpp>
-namespace ugdiss
+namespace ugdiss
{
namespace po=boost::program_options;
typedef po::options_description progopts;
typedef po::positional_options_description posopts;
typedef po::variables_map optsmap;
- void
- get_options(int ac, char* av[],
- progopts & o,
- posopts & a,
- optsmap & vm,
+ void
+ get_options(int ac, char* av[],
+ progopts & o,
+ posopts & a,
+ optsmap & vm,
char const* cfgFileParam=NULL);
}
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
index 7dc2cd18f..f30d91acc 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc
@@ -5,15 +5,15 @@
#include <boost/foreach.hpp>
namespace Moses {
-
- void
+
+ void
filter_arguments(int const argc_in, char const* const* const argv_in,
- int & argc_moses, char*** argv_moses,
+ int & argc_moses, char*** argv_moses,
int & argc_other, char*** argv_other,
vector<pair<string,int> > const& filter)
{
*argv_moses = new char*[argc_in];
- *argv_other = new char*[argc_in];
+ *argv_other = new char*[argc_in];
(*argv_moses)[0] = new char[strlen(argv_in[0])+1];
strcpy((*argv_moses)[0], argv_in[0]);
argc_moses = 1;
@@ -30,7 +30,7 @@ namespace Moses {
strcpy((*argv_other)[argc_other++],argv_in[i]);
for (int k = 0; k < o.second; ++k)
{
- UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-',
+ UTIL_THROW_IF2(++i >= argc_in || argv_in[i][0] == '-',
"[" << HERE << "] Missing argument for "
<< "parameter " << o.first << "!");
(*argv_other)[argc_other] = new char[strlen(argv_in[i])+1];
@@ -44,7 +44,7 @@ namespace Moses {
strcpy((*argv_moses)[argc_moses++], argv_in[i++]);
}
}
-
+
} // namespace Moses
diff --git a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
index e56585e8a..605acee6c 100644
--- a/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
+++ b/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h
@@ -5,12 +5,12 @@
namespace Moses {
using namespace std;
- // Function to splice the argument list (e.g. before handing it over to
+ // Function to splice the argument list (e.g. before handing it over to
// Moses LoadParam() function. /filter/ is a vector of argument names
- // and the number of arguments after each of them
- void
+ // and the number of arguments after each of them
+ void
filter_arguments(int const argc_in, char const* const* const argv_in,
- int & argc_moses, char*** argv_moses,
+ int & argc_moses, char*** argv_moses,
int & argc_other, char*** argv_other,
vector<pair<string,int> > const& filter);
diff --git a/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h b/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h
index f26e28c52..31132c63c 100644
--- a/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h
+++ b/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h
@@ -17,40 +17,40 @@
namespace Moses
{
using namespace std;
- template<typename VAL,
+ template<typename VAL,
typename COMP = greater<VAL>,
typename IDX_T=size_t>
class
- VectorIndexSorter
+ VectorIndexSorter
: public binary_function<IDX_T const&, IDX_T const&, bool>
{
vector<VAL> const& m_vecref;
boost::shared_ptr<COMP> m_comp;
public:
-
+
COMP const& Compare;
VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
: m_vecref(v), Compare(comp) {
}
-
+
VectorIndexSorter(vector<VAL> const& v)
: m_vecref(v), m_comp(new COMP()), Compare(*m_comp) {
}
-
+
bool operator()(IDX_T const & a, IDX_T const & b) const {
bool fwd = Compare(m_vecref.at(a) ,m_vecref.at(b));
bool bwd = Compare(m_vecref[b], m_vecref[a]);
return (fwd == bwd ? a < b : fwd);
}
-
+
boost::shared_ptr<vector<IDX_T> >
GetOrder() const;
-
+
void
GetOrder(vector<IDX_T> & order) const;
-
+
};
-
+
template<typename VAL, typename COMP, typename IDX_T>
boost::shared_ptr<vector<IDX_T> >
VectorIndexSorter<VAL,COMP,IDX_T>::
@@ -60,7 +60,7 @@ namespace Moses
get_order(*ret);
return ret;
}
-
+
template<typename VAL, typename COMP, typename IDX_T>
void
VectorIndexSorter<VAL,COMP,IDX_T>::
@@ -70,6 +70,6 @@ namespace Moses
for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;
sort(order.begin(), order.end(), *this);
}
-
+
}
#endif
diff --git a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc
index 4b61ecd60..877b7a816 100644
--- a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc
+++ b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.cc
@@ -6,14 +6,14 @@
// string distance measures
// Code by Ulrich Germann
-namespace stringdist
+namespace stringdist
{
- UErrorCode strip_accents(UnicodeString & trg)
+ UErrorCode strip_accents(UnicodeString & trg)
{
UErrorCode status = U_ZERO_ERROR;
- static Transliterator *stripper
- = Transliterator::createInstance("NFD; [:M:] Remove; NFC",
+ static Transliterator *stripper
+ = Transliterator::createInstance("NFD; [:M:] Remove; NFC",
UTRANS_FORWARD, status);
stripper->transliterate(trg);
return status;
@@ -22,9 +22,9 @@ namespace stringdist
char const*
StringDiff::
Segment::
- elabel[] = { "same", "cap", "flip", "permutation",
- "accent", "duplication",
- "insertion", "deletion",
+ elabel[] = { "same", "cap", "flip", "permutation",
+ "accent", "duplication",
+ "insertion", "deletion",
"mismatch", "noinit" };
StringDiff::
@@ -44,7 +44,7 @@ namespace stringdist
Segment()
: start_a(-1), end_a(-1), start_b(-1), end_b(-1), match(noinit), dist(0)
{}
-
+
UnicodeString const&
StringDiff::
set_a(string const& a)
@@ -74,8 +74,8 @@ namespace stringdist
{
return this->b;
}
-
- size_t
+
+ size_t
StringDiff::
size()
{
@@ -94,7 +94,7 @@ namespace stringdist
// if (s.match == same) continue;
// else if (s.match == insertion) ret += s.end_b - s.start_b;
// else if (s.match == deletion) ret += s.end_a - s.start_a;
-
+
// }
// }
@@ -138,7 +138,7 @@ namespace stringdist
#endif
}
- float
+ float
fillAlignmentMatrix(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB,
vector<vector<float> > & M)
@@ -164,7 +164,7 @@ namespace stringdist
return M.back().back();
}
- float
+ float
levenshtein(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB)
{
@@ -180,7 +180,7 @@ namespace stringdist
cout << endl;
}
cout << string(25,'-') << endl;
-#endif
+#endif
int i = M.size() -1;
int j = M.back().size() -1;
@@ -207,29 +207,29 @@ namespace stringdist
return ret;
}
-
+
StringDiff::
Segment::
- Segment(size_t const as, size_t const ae,
+ Segment(size_t const as, size_t const ae,
size_t const bs, size_t const be,
- UnicodeString const& a,
- UnicodeString const& b)
+ UnicodeString const& a,
+ UnicodeString const& b)
{
dist = 0;
- start_a = as; end_a = ae;
+ start_a = as; end_a = ae;
start_b = bs; end_b = be;
if (as == ae)
match = bs == be ? same : insertion;
- else if (bs == be)
+ else if (bs == be)
match = deletion;
- else if (be-bs != ae-as)
+ else if (be-bs != ae-as)
{
match = mismatch;
dist = stringdist::levenshtein(a.getBuffer() + as, ae - as,
b.getBuffer() + bs, be - bs);
}
- else
+ else
{
match = same;
size_t stop = ae-as;
@@ -251,11 +251,11 @@ namespace stringdist
}
}
}
- if (match == insertion)
+ if (match == insertion)
{
dist = be-bs;
}
- else if (match == deletion)
+ else if (match == deletion)
{
dist = ae-as;
}
@@ -309,18 +309,18 @@ namespace stringdist
if (i) --i;
if (j) --j;
}
- for (size_t k = 0; k < A.size(); ++k)
+ for (size_t k = 0; k < A.size(); ++k)
A[k] = min(A[k],A2[k]);
- for (size_t k = 0; k < B.size(); ++k)
+ for (size_t k = 0; k < B.size(); ++k)
B[k] = min(B[k],B2[k]);
-
+
if (a[i] == b[j]) { A[i] = j; B[j] = i; }
i = 0;
j = 0;
size_t I, J;
while (i < a.length() and j < b.length())
{
- if (A[i] < 0)
+ if (A[i] < 0)
{
I = i + 1;
while (I < A.size() and A[I] < 0) ++I;
@@ -338,24 +338,24 @@ namespace stringdist
difflist.push_back(Segment(i,i,j,J,a,b));
j = J;
}
- else
+ else
{
- I = i;
+ I = i;
J = j;
- while(I < A.size() && A[I] >= 0 && J < B.size() && B[J] >= 0)
+ while(I < A.size() && A[I] >= 0 && J < B.size() && B[J] >= 0)
{ ++I; ++J; }
difflist.push_back(Segment(i,I,j,J,a,b));
i = I; j = J;
}
}
- if (i < a.length() || j < b.length())
+ if (i < a.length() || j < b.length())
difflist.push_back(Segment(i,a.length(),j,b.length(),a,b));
diffcnt.assign(noinit,0);
for (size_t i = 0; i < difflist.size(); ++i)
{
Segment & s = difflist[i];
- if (s.match == insertion and
+ if (s.match == insertion and
((s.start_a and a[s.start_a - 1] == b[s.start_b]) or
(s.end_a < a.length() and a[s.end_a] == b[s.start_b])))
{
@@ -364,7 +364,7 @@ namespace stringdist
sameletter = b[i] == b[i-1];
if (sameletter) s.match = duplication;
}
- else if (s.match == deletion and
+ else if (s.match == deletion and
((s.start_b and b[s.start_b - 1] == a[s.start_a]) or
(s.end_b < b.length() and b[s.end_b] == a[s.start_a])))
{
@@ -380,15 +380,15 @@ namespace stringdist
void
StringDiff::
- showDiff(std::ostream& out)
+ showDiff(std::ostream& out)
{
if (difflist.size() == 0) align();
vector<size_t> fromEnd(difflist.size(),0);
for (int d = difflist.size()-1; d-- > 0;)
{
fromEnd[d] = a.length() - difflist[d].end_a;
- // cout << d << " " << fromEnd[d] << " "
- // << difflist[d].start_a << "-"
+ // cout << d << " " << fromEnd[d] << " "
+ // << difflist[d].start_a << "-"
// << difflist[d].end_a << endl;
}
for (size_t d = 0; d < difflist.size(); ++d)
@@ -402,7 +402,7 @@ namespace stringdist
bseg.toUTF8String(bbuf);
out << abuf << " ";
out << bbuf << " ";
- out << s.label() << " "
+ out << s.label() << " "
<< s.dist << " "
<< fromEnd[d]
<< endl;
@@ -423,7 +423,7 @@ namespace stringdist
{
return difflist.at(i);
}
-
+
vector<int> const&
StringDiff::
getFeatures() const
diff --git a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h
index 43fb089f1..8dfcfb58a 100644
--- a/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h
+++ b/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h
@@ -21,15 +21,15 @@ using namespace std;
//using namespace boost;
using namespace ugdiss;
-namespace stringdist
+namespace stringdist
{
- float
+ float
levenshtein(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB);
UErrorCode strip_accents(UnicodeString & trg);
- float
+ float
fillAlignmentMatrix(UChar const* a, size_t const lenA,
UChar const* b, size_t const lenB,
vector<vector<float> > & M);
@@ -37,9 +37,9 @@ namespace stringdist
class StringDiff
{
public:
- enum MATCHTYPE
+ enum MATCHTYPE
{
- same, // a and b are identical
+ same, // a and b are identical
cap, // a and b differ only in capitalization
flip, // two-letter flip
permutation, // a and b have same letters but in different order
@@ -48,7 +48,7 @@ namespace stringdist
insertion, // a is empty
deletion, // b is empty
mismatch, // none of the above
- noinit // not initialized
+ noinit // not initialized
};
struct Segment
@@ -59,9 +59,9 @@ namespace stringdist
MATCHTYPE match;
float dist;
Segment();
- Segment(size_t const as, size_t const ae,
+ Segment(size_t const as, size_t const ae,
size_t const bs, size_t const be,
- UnicodeString const& a,
+ UnicodeString const& a,
UnicodeString const& b);
char const* label() const;
};
diff --git a/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc b/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc
index 662493e18..b4565f99d 100644
--- a/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc
+++ b/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc
@@ -3,10 +3,10 @@ namespace Moses
{
ThreadSafeCounter::
ThreadSafeCounter()
- : ctr(0)
+ : ctr(0)
{ }
- size_t
+ size_t
ThreadSafeCounter::
operator++()
{
@@ -14,21 +14,21 @@ namespace Moses
return ++ctr;
}
- size_t
+ size_t
ThreadSafeCounter::
operator++(int foo)
{
boost::lock_guard<boost::mutex> guard(this->lock);
return ctr++;
}
-
+
ThreadSafeCounter::
operator size_t() const
{
return ctr;
}
- size_t
+ size_t
ThreadSafeCounter::
operator--()
{
@@ -36,13 +36,13 @@ namespace Moses
return --ctr;
}
- size_t
+ size_t
ThreadSafeCounter::
operator--(int foo)
{
boost::lock_guard<boost::mutex> guard(this->lock);
return ctr--;
}
-
-
+
+
}
diff --git a/moses/TranslationModel/UG/mm/calc-coverage.cc b/moses/TranslationModel/UG/mm/calc-coverage.cc
index ef17656d9..83f67220d 100644
--- a/moses/TranslationModel/UG/mm/calc-coverage.cc
+++ b/moses/TranslationModel/UG/mm/calc-coverage.cc
@@ -16,7 +16,7 @@ using namespace ugdiss;
typedef L2R_Token<SimpleWordId> Token;
TokenIndex V;
sptr<vector<vector<Token> > > C(new vector<vector<Token> >());
-void
+void
add_file(string fname)
{
filtering_istream in;
diff --git a/moses/TranslationModel/UG/mm/custom-pt.cc b/moses/TranslationModel/UG/mm/custom-pt.cc
index 93c8c0eb0..1a51aa8a4 100644
--- a/moses/TranslationModel/UG/mm/custom-pt.cc
+++ b/moses/TranslationModel/UG/mm/custom-pt.cc
@@ -31,7 +31,7 @@ using namespace Moses;
using namespace Moses::bitext;
#define CACHING_THRESHOLD 1000
-#define lbop boost::math::binomial_distribution<>::find_lower_bound_on_p
+#define lbop boost::math::binomial_distribution<>::find_lower_bound_on_p
size_t mctr=0,xctr=0;
typedef L2R_Token<SimpleWordId> Token;
@@ -49,15 +49,15 @@ PScoreWC<Token> apply_wp;
vector<float> fweights;
void
-nbest_phrasepairs(uint64_t const pid1,
- pstats const& ps,
+nbest_phrasepairs(uint64_t const pid1,
+ pstats const& ps,
vector<PhrasePair> & nbest)
{
pstats::trg_map_t::const_iterator m;
vector<size_t> idx(nbest.size());
size_t i=0;
- for (m = ps.trg.begin();
- m != ps.trg.end() && i < nbest.size();
+ for (m = ps.trg.begin();
+ m != ps.trg.end() && i < nbest.size();
++m)
{
// cout << m->second.rcnt() << " " << ps.good << endl;
@@ -74,17 +74,17 @@ nbest_phrasepairs(uint64_t const pid1,
++i;
}
// cout << i << " " << nbest.size() << endl;
- if (i < nbest.size())
+ if (i < nbest.size())
{
// cout << "Resizing from " << nbest.size() << " to " << i << endl;
nbest.resize(i);
idx.resize(i);
}
VectorIndexSorter<PhrasePair> sorter(nbest,greater<PhrasePair>());
- if (m != ps.trg.end())
+ if (m != ps.trg.end())
{
make_heap(idx.begin(),idx.end(),sorter);
- PhrasePair cand;
+ PhrasePair cand;
cand.init(pid1,ps,5);
for (; m != ps.trg.end(); ++m)
{
@@ -104,7 +104,7 @@ nbest_phrasepairs(uint64_t const pid1,
}
sort(nbest.begin(),nbest.end(),greater<PhrasePair>());
}
-
+
int main(int argc, char* argv[])
{
// assert(argc == 4);
@@ -120,8 +120,8 @@ int main(int argc, char* argv[])
string L2 = "en";
size_t max_samples = argc > 1 ? atoi(argv[1]) : 1000;
#endif
- char c = *base.rbegin();
- if (c != '/' && c != '.')
+ char c = *base.rbegin();
+ if (c != '/' && c != '.')
base += ".";
fweights.resize(5,.25);
@@ -138,7 +138,7 @@ int main(int argc, char* argv[])
string line;
while (getline(cin,line))
{
- vector<id_type> snt;
+ vector<id_type> snt;
bt.V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
@@ -156,8 +156,8 @@ int main(int argc, char* argv[])
sptr<pstats> s = bt.lookup(m);
for (size_t j = i; j <= k; ++j)
cout << (*bt.V1)[snt[j]] << " ";
- cout << s->good << "/"
- << s->sample_cnt << "/"
+ cout << s->good << "/"
+ << s->sample_cnt << "/"
<< s->raw_cnt << endl;
// vector<PhrasePair> nbest(min(s->trg.size(),size_t(20)));
vector<PhrasePair> nbest(s->trg.size());
@@ -172,17 +172,17 @@ int main(int argc, char* argv[])
cout << " " << setw(6) << pp.score << " ";
for (uint32_t i = off; i < stop; ++i)
cout << (*bt.V2)[o[i].id()] << " ";
- cout << pp.joint << "/"
+ cout << pp.joint << "/"
<< pp.raw1 << "/"
<< pp.raw2 << " |";
- BOOST_FOREACH(float f, pp.fvals)
+ BOOST_FOREACH(float f, pp.fvals)
cout << " " << f;
cout << endl;
}
}
}
}
-#endif
+#endif
exit(0);
}
#endif
diff --git a/moses/TranslationModel/UG/mm/mam2symal.cc b/moses/TranslationModel/UG/mm/mam2symal.cc
index 9610e6f56..eb5034aab 100644
--- a/moses/TranslationModel/UG/mm/mam2symal.cc
+++ b/moses/TranslationModel/UG/mm/mam2symal.cc
@@ -22,7 +22,7 @@ typedef L2R_Token<Conll_Sform> Token;
mmTtrack<char> MAM;
bool with_sids;
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -31,7 +31,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("mamfile", po::value<string>(&mamfile), "mamfile")
@@ -40,7 +40,7 @@ interpret_args(int ac, char* av[])
po::positional_options_description a;
a.add("mamfile",1);
a.add("range",-1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
@@ -56,11 +56,11 @@ interpret_args(int ac, char* av[])
}
}
-void
+void
printRangeMAM(size_t start, size_t stop)
{
for (;start < stop; start++)
- {
+ {
// size_t i = 0;
char const* p = MAM.sntStart(start);
char const* q = MAM.sntEnd(start);
@@ -76,7 +76,7 @@ printRangeMAM(size_t start, size_t stop)
}
}
-int
+int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
@@ -91,7 +91,7 @@ main(int argc, char*argv[])
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
- if (last < MAM.size())
+ if (last < MAM.size())
printRangeMAM(first,last+1);
}
}
diff --git a/moses/TranslationModel/UG/mm/mam_verify.cc b/moses/TranslationModel/UG/mm/mam_verify.cc
index d43539742..798baa947 100644
--- a/moses/TranslationModel/UG/mm/mam_verify.cc
+++ b/moses/TranslationModel/UG/mm/mam_verify.cc
@@ -21,7 +21,7 @@ mmTtrack<char> MAM;
mmTtrack<Token> T1,T2;
bool inv;
vector<string> range;
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -30,7 +30,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("inv,i", po::bool_switch(&inv), "inverse")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name")
@@ -43,7 +43,7 @@ interpret_args(int ac, char* av[])
a.add("L1",1);
a.add("L2",1);
a.add("range",-1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
@@ -87,7 +87,7 @@ check_range(size_t start, size_t stop)
return noAln;
}
-int
+int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
@@ -100,7 +100,7 @@ main(int argc, char*argv[])
exit(1);
}
size_t noAln;
- if (!range.size())
+ if (!range.size())
noAln = check_range(0, MAM.size());
else
{
@@ -112,7 +112,7 @@ main(int argc, char*argv[])
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
- if (last < MAM.size())
+ if (last < MAM.size())
noAln += check_range(first,last+1);
}
}
diff --git a/moses/TranslationModel/UG/mm/mmlex-build.cc b/moses/TranslationModel/UG/mm/mmlex-build.cc
index 5e5ea194c..1e7bee5cb 100644
--- a/moses/TranslationModel/UG/mm/mmlex-build.cc
+++ b/moses/TranslationModel/UG/mm/mmlex-build.cc
@@ -1,8 +1,8 @@
// -*- c++ -*-
// Program to extract word cooccurrence counts from a memory-mapped
// word-aligned bitext stores the counts lexicon in the format for
-// mm2dTable<uint32_t> (ug_mm_2d_table.h)
-//
+// mm2dTable<uint32_t> (ug_mm_2d_table.h)
+//
// (c) 2010-2012 Ulrich Germann
// to do: multi-threading
@@ -20,8 +20,8 @@
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include <boost/math/distributions/binomial.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
#include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
#include "moses/Util.h"
@@ -36,7 +36,7 @@ using namespace boost::math;
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
typedef SimpleWordId Token;
-// DECLARATIONS
+// DECLARATIONS
void interpret_args(int ac, char* av[]);
mmTtrack<Token> T1,T2;
@@ -52,7 +52,7 @@ struct Count
Count(uint32_t ax, uint32_t cx) : a(ax), c(cx) {}
};
-bool
+bool
operator<(pair<id_type,Count> const& a,
pair<id_type,Count> const& b)
{
@@ -72,7 +72,7 @@ public:
countlist_t & LEX;
size_t offset;
size_t skip;
- Counter(countlist_t& lex, size_t o, size_t s)
+ Counter(countlist_t& lex, size_t o, size_t s)
: LEX(lex), offset(o), skip(s) {}
void processSentence(id_type sid);
void operator()();
@@ -83,7 +83,7 @@ int verbose;
size_t truncat;
size_t num_threads;
-void
+void
Counter::
operator()()
{
@@ -105,17 +105,17 @@ struct lexsorter
{
vector<countlist_t> const& v;
id_type wid;
- lexsorter(vector<countlist_t> const& vx, id_type widx)
+ lexsorter(vector<countlist_t> const& vx, id_type widx)
: v(vx),wid(widx) {}
bool operator()(pair<uint32_t,uint32_t> const& a,
pair<uint32_t,uint32_t> const& b) const
{
- return (v.at(a.first).at(wid).at(a.second).first >
+ return (v.at(a.first).at(wid).at(a.second).first >
v.at(b.first).at(wid).at(b.second).first);
}
};
-void
+void
writeTableHeader(ostream& out)
{
filepos_type idxOffset=0;
@@ -159,7 +159,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
H.pop_back();
else
push_heap(H.begin(),H.end(),sorter);
- while (H.size() &&
+ while (H.size() &&
XLEX[H[0].first][id1].at(H[0].second).first == id2)
{
aln += XLEX[H[0].first][id1][H[0].second].second.a;
@@ -178,7 +178,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
numwrite(*aln_out,aln);
m1a[id1] += aln;
m2a[id2] += aln;
- }
+ }
if (coc_out && coc)
{
++CellCountC;
@@ -191,7 +191,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
}
idxa.back() = CellCountA;
idxc.back() = CellCountC;
- if (aln_out)
+ if (aln_out)
{
filepos_type idxOffsetA = aln_out->tellp();
BOOST_FOREACH(id_type foo, idxa)
@@ -201,7 +201,7 @@ void writeTable(ostream* aln_out, ostream* coc_out)
aln_out->seekp(0);
numwrite(*aln_out,idxOffsetA);
}
- if (coc_out)
+ if (coc_out)
{
filepos_type idxOffsetC = coc_out->tellp();
BOOST_FOREACH(id_type foo, idxc)
@@ -223,9 +223,9 @@ processSentence(id_type sid)
Token const* e2 = T2.sntEnd(sid);
vector<ushort> cnt1(V1.ksize(),0);
vector<ushort> cnt2(V2.ksize(),0);
- for (Token const* x = s1; x < e1; ++x)
+ for (Token const* x = s1; x < e1; ++x)
++cnt1.at(x->id());
- for (Token const* x = s2; x < e2; ++x)
+ for (Token const* x = s2; x < e2; ++x)
++cnt2.at(x->id());
boost::unordered_set<wpair> seen;
@@ -257,21 +257,21 @@ processSentence(id_type sid)
wpair k(id1,id2);
Count& cnt = CNT[k];
cnt.a++;
- if (seen.insert(k).second)
+ if (seen.insert(k).second)
cnt.c += cnt1[id1] * cnt2[id2];
}
// count unaliged words
- for (size_t i = check1.find_first();
- i < check1.size();
+ for (size_t i = check1.find_first();
+ i < check1.size();
i = check1.find_next(i))
CNT[wpair((s1+i)->id(),0)].a++;
- for (size_t i = check2.find_first();
- i < check2.size();
+ for (size_t i = check2.find_first();
+ i < check2.size();
i = check2.find_next(i))
CNT[wpair(0,(s2+i)->id())].a++;
}
-int
+int
main(int argc, char* argv[])
{
interpret_args(argc,argv);
@@ -299,7 +299,7 @@ main(int argc, char* argv[])
if (cooc.size()) coc_out.close();
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -321,7 +321,7 @@ interpret_args(int ac, char* av[])
("truncate,n", po::value<size_t>(&truncat)->default_value(0),
"truncate corpus to <N> sentences (for debugging)")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
("L1", po::value<string>(&L1),"L1 tag")
diff --git a/moses/TranslationModel/UG/mm/mmlex-lookup.cc b/moses/TranslationModel/UG/mm/mmlex-lookup.cc
index fbdceeaa0..3ba9ef492 100644
--- a/moses/TranslationModel/UG/mm/mmlex-lookup.cc
+++ b/moses/TranslationModel/UG/mm/mmlex-lookup.cc
@@ -1,8 +1,8 @@
// -*- c++ -*-
// Program to extract word cooccurrence counts from a memory-mapped
// word-aligned bitext stores the counts lexicon in the format for
-// mm2dTable<uint32_t> (ug_mm_2d_table.h)
-//
+// mm2dTable<uint32_t> (ug_mm_2d_table.h)
+//
// (c) 2010-2012 Ulrich Germann
// to do: multi-threading
@@ -20,8 +20,8 @@
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include <boost/math/distributions/binomial.hpp>
-#include <boost/unordered_map.hpp>
-#include <boost/unordered_set.hpp>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
#include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
#include "ug_mm_2d_table.h"
@@ -35,7 +35,7 @@ using namespace boost::math;
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
typedef SimpleWordId Token;
-// DECLARATIONS
+// DECLARATIONS
void interpret_args(int ac, char* av[]);
string swrd,twrd,L1,L2,bname;
@@ -43,7 +43,7 @@ TokenIndex V1,V2;
LEX_t LEX;
-void
+void
lookup_source(ostream& out, id_type r)
{
vector<LEX_t::Cell> foo(LEX[r].start,LEX[r].stop);
@@ -57,7 +57,7 @@ lookup_source(ostream& out, id_type r)
}
}
-void
+void
lookup_target(ostream& out, id_type c)
{
vector<LEX_t::Cell> foo;
@@ -65,7 +65,7 @@ lookup_target(ostream& out, id_type c)
for (size_t r = 0; r < LEX.numRows; ++r)
{
size_t j = LEX[r][c];
- if (j)
+ if (j)
{
cell.id = r;
cell.val = j;
@@ -82,7 +82,7 @@ lookup_target(ostream& out, id_type c)
}
}
-void
+void
dump(ostream& out)
{
for (size_t r = 0; r < LEX.numRows; ++r)
@@ -91,7 +91,7 @@ dump(ostream& out)
}
-int
+int
main(int argc, char* argv[])
{
interpret_args(argc,argv);
@@ -100,14 +100,14 @@ main(int argc, char* argv[])
V1.open(bname+L1+".tdx");
V2.open(bname+L2+".tdx");
LEX.open(bname+L1+"-"+L2+".lex");
-
+
cout.precision(2);
id_type swid = V1[swrd];
id_type twid = V2[twrd];
if (swid != 1 && twid != 1)
{
- cout << swrd << " " << twrd << " "
- << LEX.m1(swid) << " / "
+ cout << swrd << " " << twrd << " "
+ << LEX.m1(swid) << " / "
<< LEX[swid][twid] << " / "
<< LEX.m2(twid) << endl;
}
@@ -119,7 +119,7 @@ main(int argc, char* argv[])
dump(cout);
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -133,7 +133,7 @@ interpret_args(int ac, char* av[])
("source,s",po::value<string>(&swrd),"source word")
("target,t",po::value<string>(&twrd),"target word")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
("L1", po::value<string>(&L1),"L1 tag")
diff --git a/moses/TranslationModel/UG/mm/mtt-build.cc b/moses/TranslationModel/UG/mm/mtt-build.cc
index f49895ebf..a61cbac3f 100644
--- a/moses/TranslationModel/UG/mm/mtt-build.cc
+++ b/moses/TranslationModel/UG/mm/mtt-build.cc
@@ -46,8 +46,8 @@ bool quiet = false; // no progress reporting
string vocabBase; // base name for existing vocabs that should be used
string baseName; // base name for all files
-string tmpFile, mttFile; /* name of temporary / actual track file
- * (.mtt for Conll format, .mct for plain text)
+string tmpFile, mttFile; /* name of temporary / actual track file
+ * (.mtt for Conll format, .mct for plain text)
*/
string UNK;
@@ -60,7 +60,7 @@ void interpret_args(int ac, char* av[]);
inline uchar rangeCheck(int p, int limit) { return p < limit ? p : 1; }
-id_type
+id_type
get_id(TokenIndex const& T, string const& w)
{
id_type ret = T[w];
@@ -73,21 +73,21 @@ get_id(TokenIndex const& T, string const& w)
return ret;
}
-void
+void
open_vocab(TokenIndex& T, string fname)
{
- if (!access(fname.c_str(), F_OK))
- {
- T.open(fname,UNK);
- assert(T[UNK] == 1);
+ if (!access(fname.c_str(), F_OK))
+ {
+ T.open(fname,UNK);
+ assert(T[UNK] == 1);
}
else T.setUnkLabel(UNK);
if (incremental) T.setDynamic(true);
- assert(T["NULL"] == 0);
+ assert(T["NULL"] == 0);
assert(T[UNK] == 1);
}
-void
+void
ini_cnt_vec(TokenIndex const& T, vector<pair<string,size_t> > & v)
{
v.resize(T.totalVocabSize());
@@ -142,7 +142,7 @@ void fill_rec(Conll_Record& rec, vector<string> const& w)
else if (w.size() >= 8) // CONLL format
{
int id = atoi(w[0].c_str());
- int gov = atoi(w[6].c_str());
+ int gov = atoi(w[6].c_str());
rec.sform = get_id(SF, w[1]);
rec.lemma = get_id(LM, w[2]);
rec.majpos = rangeCheck(get_id(PS, w[3]), 256);
@@ -161,12 +161,12 @@ void log_progress(size_t ctr)
}
else if (ctr % 10000 == 0)
{
- cerr << ".";
+ cerr << ".";
}
}
-size_t
+size_t
process_plain_input(ostream& out, vector<id_type> & s_index)
{
id_type totalWords = 0;
@@ -176,7 +176,7 @@ process_plain_input(ostream& out, vector<id_type> & s_index)
istringstream buf(line);
if (!quiet) log_progress(s_index.size());
s_index.push_back(totalWords);
- while (buf>>w)
+ while (buf>>w)
{
numwrite(out,get_id(SF,w));
++totalWords;
@@ -186,9 +186,9 @@ process_plain_input(ostream& out, vector<id_type> & s_index)
return totalWords;
}
-size_t
-process_tagged_input(ostream& out,
- vector<id_type> & s_index,
+size_t
+process_tagged_input(ostream& out,
+ vector<id_type> & s_index,
vector<id_type> & p_index)
{
string line;
@@ -196,7 +196,7 @@ process_tagged_input(ostream& out,
bool new_sent = true;
bool new_par = true;
id_type totalWords = 0;
-
+
while (getline(cin,line))
{
vector<string> w; string f; istringstream buf(line);
@@ -205,7 +205,7 @@ process_tagged_input(ostream& out,
if (w.size() == 0 || starts_with(w[0], "SID="))
new_sent = true;
- else if (w.size() == 1 && w[0] == "<P>")
+ else if (w.size() == 1 && w[0] == "<P>")
new_par = new_sent = true;
if (w.size() < 3) continue;
@@ -244,7 +244,7 @@ numberize()
index = &p_index;
}
- if (!quiet)
+ if (!quiet)
cerr << endl << "Writing index ... (" << index->size() << " chunks) ";
startIdx = out.tellp();
@@ -261,7 +261,7 @@ numberize()
vector<id_type> smap,lmap,pmap,dmap;
-void
+void
invert(vector<id_type> const& from, vector<id_type> & to)
{
to.resize(from.size());
@@ -269,11 +269,11 @@ invert(vector<id_type> const& from, vector<id_type> & to)
to[from[i]] = i;
}
-// sorts new items based on occurrence counts but won't reassign
+// sorts new items based on occurrence counts but won't reassign
// existing token ids
-void
-conservative_sort(TokenIndex const & V,
- vector<size_t> const & cnt,
+void
+conservative_sort(TokenIndex const & V,
+ vector<size_t> const & cnt,
vector<id_type> & xmap)
{
xmap.resize(V.totalVocabSize());
@@ -344,21 +344,21 @@ void save_vocabs()
string vbase = baseName;
if (is_conll)
{
- if (SF.totalVocabSize() > SF.knownVocabSize())
+ if (SF.totalVocabSize() > SF.knownVocabSize())
write_tokenindex(vbase+".tdx.sfo",SF,smap);
- if (LM.totalVocabSize() > LM.knownVocabSize())
+ if (LM.totalVocabSize() > LM.knownVocabSize())
write_tokenindex(vbase+".tdx.lem",LM,lmap);
- if (PS.totalVocabSize() > PS.knownVocabSize())
+ if (PS.totalVocabSize() > PS.knownVocabSize())
write_tokenindex(vbase+".tdx.pos",PS,pmap);
- if (DT.totalVocabSize() > DT.knownVocabSize())
+ if (DT.totalVocabSize() > DT.knownVocabSize())
write_tokenindex(vbase+".tdx.drl",DT,dmap);
}
- else if (SF.totalVocabSize() > SF.knownVocabSize())
+ else if (SF.totalVocabSize() > SF.knownVocabSize())
write_tokenindex(vbase+".tdx",SF,smap);
}
template<typename Token>
-size_t
+size_t
build_mmTSA(string infile, string outfile)
{
size_t mypid = fork();
@@ -371,14 +371,14 @@ build_mmTSA(string infile, string outfile)
exit(0);
}
-bool
+bool
build_plaintext_tsas()
{
typedef L2R_Token<SimpleWordId> L2R;
typedef R2L_Token<SimpleWordId> R2L;
size_t c = with_sfas + with_pfas;
- if (with_sfas) build_mmTSA<L2R>(tmpFile, baseName + ".sfa");
- if (with_pfas) build_mmTSA<R2L>(tmpFile, baseName + ".pfa");
+ if (with_sfas) build_mmTSA<L2R>(tmpFile, baseName + ".sfa");
+ if (with_pfas) build_mmTSA<R2L>(tmpFile, baseName + ".pfa");
while (c--) wait(NULL);
return true;
}
@@ -388,27 +388,27 @@ void build_conll_tsas()
string bn = baseName;
string mtt = tmpFile;
size_t c = 3 * (with_sfas + with_pfas + with_dcas);
- if (with_sfas)
+ if (with_sfas)
{
build_mmTSA<L2R_Token<Conll_Sform> >(mtt,bn+".sfa-sform");
build_mmTSA<L2R_Token<Conll_Lemma> >(mtt,bn+".sfa-lemma");
build_mmTSA<L2R_Token<Conll_MinPos> >(mtt,bn+".sfa-minpos");
}
- if (with_pfas)
+ if (with_pfas)
{
build_mmTSA<R2L_Token<Conll_Sform> >(mtt,bn+".pfa-sform");
build_mmTSA<R2L_Token<Conll_Lemma> >(mtt,bn+".pfa-lemma");
build_mmTSA<R2L_Token<Conll_MinPos> >(mtt,bn+".pfa-minpos");
}
- if (with_dcas)
+ if (with_dcas)
{
- build_mmTSA<ConllBottomUpToken<Conll_Sform> >(mtt,bn+".dca-sform");
- build_mmTSA<ConllBottomUpToken<Conll_Lemma> >(mtt,bn+".dca-lemma");
+ build_mmTSA<ConllBottomUpToken<Conll_Sform> >(mtt,bn+".dca-sform");
+ build_mmTSA<ConllBottomUpToken<Conll_Lemma> >(mtt,bn+".dca-lemma");
build_mmTSA<ConllBottomUpToken<Conll_MinPos> >(mtt,bn+".dca-minpos");
}
- while (c--) wait(NULL);
+ while (c--) wait(NULL);
}
@@ -430,7 +430,7 @@ int main(int argc, char* argv[])
rename(tmpFile.c_str(),mttFile.c_str());
}
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -439,10 +439,10 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
- ("quiet,q", po::bool_switch(&quiet),
+ ("quiet,q", po::bool_switch(&quiet),
"don't print progress information")
- ("incremental,i", po::bool_switch(&incremental),
+ ("incremental,i", po::bool_switch(&incremental),
"incremental mode; rewrites vocab files!")
("vocab-base,v", po::value<string>(&vocabBase),
@@ -451,15 +451,15 @@ interpret_args(int ac, char* av[])
("output,o", po::value<string>(&baseName),
"base file name of the resulting file(s)")
- ("sfa,s", po::value<int>(&with_sfas)->default_value(1),
+ ("sfa,s", po::value<int>(&with_sfas)->default_value(1),
"also build suffix arrays")
("pfa,p", po::value<int>(&with_pfas)
- ->default_value(0)->implicit_value(1),
+ ->default_value(0)->implicit_value(1),
"also build prefix arrays")
("dca,d", po::value<int>(&with_dcas)
- ->default_value(0)->implicit_value(1),
+ ->default_value(0)->implicit_value(1),
"also build dependency chain arrays")
("conll,c", po::bool_switch(&is_conll),
@@ -468,18 +468,18 @@ interpret_args(int ac, char* av[])
("unk,u", po::value<string>(&UNK)->default_value("UNK"),
"label for unknown tokens")
- // ("map,m", po::value<string>(&vmap),
+ // ("map,m", po::value<string>(&vmap),
// "map words to word classes for indexing")
-
+
;
-
+
po::options_description h("Hidden Options");
h.add_options()
;
h.add(o);
po::positional_options_description a;
a.add("output",1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h)
.positional(a)
@@ -487,7 +487,7 @@ interpret_args(int ac, char* av[])
po::notify(vm);
if (vm.count("help") || !vm.count("output"))
{
- cout << "\nusage:\n\t cat <corpus> | " << av[0]
+ cout << "\nusage:\n\t cat <corpus> | " << av[0]
<< " [options] <output .mtt file>" << endl;
cout << o << endl;
exit(0);
diff --git a/moses/TranslationModel/UG/mm/mtt-count-words.cc b/moses/TranslationModel/UG/mm/mtt-count-words.cc
index c9b435477..223ba2090 100644
--- a/moses/TranslationModel/UG/mm/mtt-count-words.cc
+++ b/moses/TranslationModel/UG/mm/mtt-count-words.cc
@@ -36,7 +36,7 @@ int main(int argc, char* argv[])
{
interpret_args(argc,argv);
T.open(bname+".mct");
- V.open(bname+".tdx");
+ V.open(bname+".tdx");
vector<size_t> cnt(V.ksize(),0);
for (size_t sid = 0; sid < T.size(); ++sid)
{
@@ -48,7 +48,7 @@ int main(int argc, char* argv[])
exit(0);
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -60,7 +60,7 @@ interpret_args(int ac, char* av[])
o.add_options()
("help,h", "print this message")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
;
diff --git a/moses/TranslationModel/UG/mm/mtt-demo1.cc b/moses/TranslationModel/UG/mm/mtt-demo1.cc
index a253e9ed3..d3506fa0f 100644
--- a/moses/TranslationModel/UG/mm/mtt-demo1.cc
+++ b/moses/TranslationModel/UG/mm/mtt-demo1.cc
@@ -21,17 +21,17 @@ int main(int argc, char* argv[])
using namespace std;
if (argc < 3)
{
- cerr << "usage: " << argv[0] << " <track base name> lookup word sequence"
+ cerr << "usage: " << argv[0] << " <track base name> lookup word sequence"
<< endl;
}
string base = argv[1];
- TokenIndex V;
+ TokenIndex V;
V.open(base+".tdx");
- boost::shared_ptr<mmTtrack<Token> > T(new mmTtrack<Token>());
+ boost::shared_ptr<mmTtrack<Token> > T(new mmTtrack<Token>());
T->open(base+".mct");
mmTSA<Token> I; I.open(base+".sfa",T);
mmTSA<Token>::tree_iterator m(&I);
-
+
// look up the search string m.extend() returns true upon success
for (int i = 2; i < argc && m.extend(V[argv[i]]); ++i);
if (int(m.size() + 2) < argc)
@@ -39,7 +39,7 @@ int main(int argc, char* argv[])
cerr << "NOT FOUND" << endl;
exit(1);
}
-
+
tsa::ArrayEntry e(m.lower_bound(-1));
char const* stop = m.upper_bound(-1);
do
diff --git a/moses/TranslationModel/UG/mm/mtt-dump.cc b/moses/TranslationModel/UG/mm/mtt-dump.cc
index b7d85d623..eea1bb400 100644
--- a/moses/TranslationModel/UG/mm/mtt-dump.cc
+++ b/moses/TranslationModel/UG/mm/mtt-dump.cc
@@ -25,7 +25,7 @@ bool sform;
bool have_mtt, have_mct;
bool with_sids;
bool with_positions;
-void
+void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
@@ -36,7 +36,7 @@ interpret_args(int ac, char* av[])
("sform,s", po::bool_switch(&sform), "sform only")
("with-positions,p", po::bool_switch(&with_positions), "show word positions")
;
-
+
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name")
@@ -45,7 +45,7 @@ interpret_args(int ac, char* av[])
po::positional_options_description a;
a.add("bname",1);
a.add("range",-1);
-
+
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
@@ -63,11 +63,11 @@ interpret_args(int ac, char* av[])
mct = bname+".mct";
}
-void
+void
printRangeMTT(size_t start, size_t stop)
{
for (;start < stop; start++)
- {
+ {
size_t i = 0;
Token const* s = MTT.sntStart(start);
Token const* e = MTT.sntEnd(start);
@@ -92,7 +92,7 @@ printRangeMTT(size_t start, size_t stop)
cout << i+t->parent << " ";
cout << DT[t->dtype] << endl;
}
- else
+ else
{
if (with_positions) cout << t-s << ":";
cout << SF[t->id()] << " ";
@@ -102,16 +102,16 @@ printRangeMTT(size_t start, size_t stop)
}
}
-void
+void
printRangeMCT(size_t start, size_t stop)
{
for (;start < stop; start++)
- {
+ {
SimpleWordId const* s = MCT.sntStart(start);
SimpleWordId const* t = s;
SimpleWordId const* e = MCT.sntEnd(start);
if (with_sids) cout << start << " ";
- while (t < e)
+ while (t < e)
{
if (with_positions) cout << t-s << ":";
cout << SF[(t++)->id()] << " ";
@@ -120,7 +120,7 @@ printRangeMCT(size_t start, size_t stop)
}
}
-int
+int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
@@ -139,14 +139,14 @@ main(int argc, char*argv[])
DT.open(bname+".tdx.drl"); DT.iniReverseIndex();
MTT.open(mtt);
}
- else
+ else
{
sform = true;
SF.open(bname+".tdx"); SF.iniReverseIndex();
MCT.open(mct);
}
-
- if (!range.size())
+
+ if (!range.size())
have_mtt ? printRangeMTT(0, MTT.size()) : printRangeMCT(0, MCT.size());
else
{
@@ -157,9 +157,9 @@ main(int argc, char*argv[])
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
- if (have_mtt && last < MTT.size())
+ if (have_mtt && last < MTT.size())
printRangeMTT(first,last+1);
- else if (last < MCT.size())
+ else if (last < MCT.size())
printRangeMCT(first,last+1);
}
}
diff --git a/moses/TranslationModel/UG/mm/mtt.count.cc b/moses/TranslationModel/UG/mm/mtt.count.cc
index 423c12ec7..1e2382f67 100644
--- a/moses/TranslationModel/UG/mm/mtt.count.cc
+++ b/moses/TranslationModel/UG/mm/mtt.count.cc
@@ -36,14 +36,14 @@ bool echo;
int main(int argc, char* argv[])
{
interpret_args(argc,argv);
-
+
T.open(bname+".mct");
V.open(bname+".tdx"); V.iniReverseIndex();
I.open(bname+".sfa",&T);
string line;
while (getline(cin,line))
{
- vector<id_type> phr;
+ vector<id_type> phr;
V.fillIdSeq(line,phr);
TSA<Token>::tree_iterator m(&I);
size_t i = 0;
@@ -55,7 +55,7 @@ int main(int argc, char* argv[])
exit(0);
}
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -68,7 +68,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("echo,e", po::bool_switch(&echo), "repeat lookup phrases")
;
-
+
h.add_options()
("bname", po::value<string>(&bname), "base name")
;
diff --git a/moses/TranslationModel/UG/mm/num_read_write.cc b/moses/TranslationModel/UG/mm/num_read_write.cc
index 403f7d300..5c281d9dd 100644
--- a/moses/TranslationModel/UG/mm/num_read_write.cc
+++ b/moses/TranslationModel/UG/mm/num_read_write.cc
@@ -2,7 +2,7 @@
namespace ugdiss {
typedef unsigned char uchar;
- void
+ void
numwrite(std::ostream& out, uint16_t const& x)
{
char buf[2];
@@ -11,7 +11,7 @@ namespace ugdiss {
out.write(buf,2);
}
- void
+ void
numwrite(std::ostream& out, uint32_t const& x)
{
char buf[4];
@@ -22,7 +22,7 @@ namespace ugdiss {
out.write(buf,4);
}
- void
+ void
numwrite(std::ostream& out, uint64_t const& x)
{
char buf[8];
@@ -37,7 +37,7 @@ namespace ugdiss {
out.write(buf,8);
}
- char const*
+ char const*
numread(char const* src, uint16_t & x)
{
uchar const* d = reinterpret_cast<uchar const*>(src);
@@ -45,28 +45,28 @@ namespace ugdiss {
return src+2;
}
- char const*
+ char const*
numread(char const* src, uint32_t & x)
{
uchar const* d = reinterpret_cast<uchar const*>(src);
- x = ((uint32_t(d[0])<<0) |
- (uint32_t(d[1])<<8) |
- (uint32_t(d[2])<<16)|
+ x = ((uint32_t(d[0])<<0) |
+ (uint32_t(d[1])<<8) |
+ (uint32_t(d[2])<<16)|
(uint32_t(d[3])<<24));
return src+4;
}
- char const*
+ char const*
numread(char const* src, uint64_t & x)
{
uchar const* d = reinterpret_cast<uchar const*>(src);
- x = ((uint64_t(d[0])<<0) |
- (uint64_t(d[1])<<8) |
- (uint64_t(d[2])<<16) |
+ x = ((uint64_t(d[0])<<0) |
+ (uint64_t(d[1])<<8) |
+ (uint64_t(d[2])<<16) |
(uint64_t(d[3])<<24) |
- (uint64_t(d[4])<<32) |
- (uint64_t(d[5])<<40) |
- (uint64_t(d[6])<<48) |
+ (uint64_t(d[4])<<32) |
+ (uint64_t(d[5])<<40) |
+ (uint64_t(d[6])<<48) |
(uint64_t(d[7])<<56));
return src+8;
}
diff --git a/moses/TranslationModel/UG/mm/num_read_write.h b/moses/TranslationModel/UG/mm/num_read_write.h
index 6fdcecc81..f83e1c982 100644
--- a/moses/TranslationModel/UG/mm/num_read_write.h
+++ b/moses/TranslationModel/UG/mm/num_read_write.h
@@ -14,11 +14,11 @@ namespace ugdiss {
void numwrite(std::ostream& out, uint16_t const& x);
void numwrite(std::ostream& out, uint32_t const& x);
void numwrite(std::ostream& out, uint64_t const& x);
-
+
char const* numread(char const* src, uint16_t & x);
char const* numread(char const* src, uint32_t & x);
char const* numread(char const* src, uint64_t & x);
-
+
// template<typename uintNumber>
// void
// numwrite(std::ostream& out, uintNumber const& x)
@@ -54,7 +54,7 @@ namespace ugdiss {
// case 8: x = bswap_64(x); break;
// default: break;
// }
-// #endif
+// #endif
// }
// template<typename uintNumber>
@@ -71,7 +71,7 @@ namespace ugdiss {
// case 8: x = bswap_64(x); break;
// default: break;
// }
-// #endif
+// #endif
// return src+sizeof(uintNumber);
// }
} // end of namespace ugdiss
diff --git a/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h b/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h
index 1810027af..e5e9ca88c 100644
--- a/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h
+++ b/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h
@@ -39,8 +39,8 @@ namespace Moses {
class jstats; // phrase pair ("joint") statistics
class agenda
{
- boost::mutex lock;
- boost::condition_variable ready;
+ boost::mutex lock;
+ boost::condition_variable ready;
class job;
class worker;
list<job> joblist;
@@ -52,9 +52,9 @@ namespace Moses {
agenda(bitext_base const& bitext);
~agenda();
void add_workers(int n);
- sptr<pstats> add_job(mmbitext::iter const& phrase,
+ sptr<pstats> add_job(mmbitext::iter const& phrase,
size_t const max_samples);
- bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
+ bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
bool & fwd, sptr<bitext_base::pstats> & stats);
};
@@ -65,22 +65,22 @@ namespace Moses {
mmTtrack<char> Tx; // word alignments
mmTtrack<Token> T1,T2; // token tracks
TokenIndex V1,V2; // vocabs
- mmTSA<Token> I1,I2; // suffix arrays
+ mmTSA<Token> I1,I2; // suffix arrays
/// given the source phrase sid[start:stop]
- // find the possible start (s1 .. s2) and end (e1 .. e2)
+ // find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
- // alignments in *core_alignment. If /flip/, source phrase is
+ // alignments in *core_alignment. If /flip/, source phrase is
// L2.
- bool
+ bool
find_trg_phr_bounds
- (size_t const sid, size_t const start, size_t const stop,
- size_t & s1, size_t & s2, size_t & e1, size_t & e2,
+ (size_t const sid, size_t const start, size_t const stop,
+ size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
- sptr<pstats>
+ sptr<pstats>
prep2(iter const& phrase);
public:
mmbitext();
@@ -98,8 +98,8 @@ namespace Moses {
jstats
{
uint32_t my_rcnt; // unweighted count
- float my_wcnt; // weighted count
- vector<pair<size_t, vector<uchar> > > my_aln;
+ float my_wcnt; // weighted count
+ vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
@@ -110,22 +110,22 @@ namespace Moses {
void add(float w, vector<uchar> const& a);
};
- struct
+ struct
mmbitext::
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
- size_t raw_cnt; // (approximate) raw occurrence count
+ size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
- // size_t snt_cnt;
+ // size_t snt_cnt;
// size_t sample_snt;
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
- pstats();
+ pstats();
// vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
@@ -142,7 +142,7 @@ namespace Moses {
public:
worker(agenda& a);
void operator()();
-
+
};
class
diff --git a/moses/TranslationModel/UG/mm/symal2mam.cc b/moses/TranslationModel/UG/mm/symal2mam.cc
index 631d4ae07..6d0af57b0 100644
--- a/moses/TranslationModel/UG/mm/symal2mam.cc
+++ b/moses/TranslationModel/UG/mm/symal2mam.cc
@@ -2,9 +2,9 @@
// program to convert GIZA-style alignments into memory-mapped format
// (c) 2010 Ulrich Germann
-// Reads from stdin a file with alternating lines: sentence lengths and symal output.
-// We need the sentence lenghts for sanity checks, because GIZA alignment might skip
-// sentences. If --skip, we skip such sentence pairs, otherwise, we leave the word
+// Reads from stdin a file with alternating lines: sentence lengths and symal output.
+// We need the sentence lenghts for sanity checks, because GIZA alignment might skip
+// sentences. If --skip, we skip such sentence pairs, otherwise, we leave the word
// alignment matrix blank.
#include "ug_mm_ttrack.h"
@@ -24,7 +24,7 @@
#include "util/exception.hh"
// #include "headers-base/util/check.hh"
-// NOTE TO SELF:
+// NOTE TO SELF:
/* Program to filter out sentences that GIZA will skip or truncate,
* i.e. sentences longer than 100 words or sentence pairs with a length
*/
@@ -42,7 +42,7 @@ TokenIndex V1;
string mtt1name,mtt2name,o1name,o2name,mamname,cfgFile;
string dataFormat,A3filename;
-void
+void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
@@ -63,7 +63,7 @@ interpret_args(int ac, char* av[])
("t2", po::value<string>(&mtt2name), "file name of L2 mapped token track")
("format,F", po::value<string>(&dataFormat)->default_value("plain"), "data format (plain or conll)")
;
-
+
h.add_options()
("mamname", po::value<string>(&mamname), "name of output file for mam")
;
@@ -76,8 +76,8 @@ interpret_args(int ac, char* av[])
if (vm.count("help") || mamname.empty())
{
cout << "usage:\n"
- << "\t\n"
- << "\t ... | " << av[0]
+ << "\t\n"
+ << "\t ... | " << av[0]
<< " <.mam file> \n" << endl;
cout << o << endl;
cout << "If an A3 file is given (as produced by (m)giza), symal2mam performs\n"
@@ -117,8 +117,8 @@ procSymalLine(string const& line, ostream& out)
{
cerr << a << "-" << b << " " << len1 << "/" << len2 << endl;
}
- assert(len1 == 0 || a<len1);
- assert(len2 == 0 || b<len2);
+ assert(len1 == 0 || a<len1);
+ assert(len2 == 0 || b<len2);
binwrite(out,a);
binwrite(out,b);
}
@@ -138,7 +138,7 @@ void finiMAM(ofstream& out, vector<id_type>& idx, id_type numTok)
out.close();
}
-void
+void
finalize(ofstream& out, vector<id_type> const& idx, id_type tokenCount)
{
id_type idxSize = idx.size();
@@ -184,7 +184,7 @@ go()
while(getline(cin,line))
{
idxm.push_back(procSymalLine(line,mam));
- if (debug && ++ctr%100000==0)
+ if (debug && ++ctr%100000==0)
cerr << ctr/1000 << "K lines processed" << endl;
}
finiMAM(mam,idxm,0);
@@ -208,20 +208,20 @@ go(string t1name, string t2name, string A3filename)
for (sid = 0; sid < T1.size(); ++sid)
{
- len1 = T1.sntLen(sid);
+ len1 = T1.sntLen(sid);
len2 = T2.sntLen(sid);
- if (debug)
- cerr << "[" << lineCtr << "] "
- << len1 << " (" << check1 << ") / "
+ if (debug)
+ cerr << "[" << lineCtr << "] "
+ << len1 << " (" << check1 << ") / "
<< len2 << " (" << check2 << ")" << endl;
- if ((check1 >=0 && check1!=len1) ||
+ if ((check1 >=0 && check1!=len1) ||
(check2 >=0 && check2!=len2))
{
if (skip)
{
- cerr << "[" << ++skipCtr << "] skipping "
- << check1 << "/" << check2 << " vs. "
- << len1 << "/" << len2
+ cerr << "[" << ++skipCtr << "] skipping "
+ << check1 << "/" << check2 << " vs. "
+ << len1 << "/" << len2
<< " at line " << lineCtr << endl;
}
else
@@ -238,9 +238,9 @@ go(string t1name, string t2name, string A3filename)
}
if (skip)
{
- idx1.push_back(tokenCount1 += len1);
+ idx1.push_back(tokenCount1 += len1);
copySentence(T1,sid,t1out);
- idx2.push_back(tokenCount2 += len2);
+ idx2.push_back(tokenCount2 += len2);
copySentence(T2,sid,t2out);
}
@@ -250,7 +250,7 @@ go(string t1name, string t2name, string A3filename)
lineCtr++;
idxm.push_back(procSymalLine(line,mam));
if (debug) cerr << "[" << lineCtr << "] "
- << check1 << " (" << len1 <<") "
+ << check1 << " (" << len1 <<") "
<< check2 << " (" << len2 <<") "
<< line << endl;
getCheckValues(A3file,check1,check2);
@@ -264,7 +264,7 @@ go(string t1name, string t2name, string A3filename)
cout << idxm.size() << endl;
}
-void
+void
initialize(ofstream& out, string const& fname)
{
out.open(fname.c_str());
diff --git a/moses/TranslationModel/UG/mm/tpt_pickler.cc b/moses/TranslationModel/UG/mm/tpt_pickler.cc
index c23913fc2..353e5b901 100644
--- a/moses/TranslationModel/UG/mm/tpt_pickler.cc
+++ b/moses/TranslationModel/UG/mm/tpt_pickler.cc
@@ -73,45 +73,45 @@ namespace ugdiss
data += T(c&mask) << 63;
}
- void
- binwrite(std::ostream& out, unsigned char data)
- {
+ void
+ binwrite(std::ostream& out, unsigned char data)
+ {
binwrite_unsigned_integer(out, data);
}
- void
+ void
binwrite(std::ostream& out, unsigned short data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
- void
+ void
binwrite(std::ostream& out, unsigned long data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
- void
+ void
binwrite(std::ostream& out, unsigned long long data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
#if __WORDSIZE == 64
- void
+ void
binwrite(std::ostream& out, unsigned int data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
-#else
- void
+#else
+ void
binwrite(std::ostream& out, size_t data)
- {
+ {
binwrite_unsigned_integer(out, data);
}
#endif
- void
+ void
binread(std::istream& in, unsigned short& data)
{
assert(sizeof(data)==2);
@@ -127,7 +127,7 @@ namespace ugdiss
data += uint16_t(c&mask) << 14;
}
- void
+ void
binread(std::istream& in, unsigned int& data)
{
assert(sizeof(data) == 4);
@@ -149,7 +149,7 @@ namespace ugdiss
data += uint32_t(c&mask) << 28;
}
- void
+ void
binread(std::istream& in, unsigned long& data)
{
#if __WORDSIZE == 32
@@ -185,16 +185,16 @@ namespace ugdiss
data += static_cast<unsigned long long>(c&mask) << 49;
if (c < 0) return;
in.get(c);
-
+
data += static_cast<unsigned long long>(c&mask) << 56;
if (c < 0) return;
in.get(c);
-
+
data += static_cast<unsigned long long>(c&mask) << 63;
#endif
}
- void
+ void
binread(std::istream& in, unsigned long long& data)
{
assert(sizeof(unsigned long long)==8);
@@ -231,14 +231,14 @@ namespace ugdiss
}
// writing and reading strings ...
- void
+ void
binwrite(std::ostream& out, std::string const& s)
{
size_t len = s.size();
ugdiss::binwrite(out,len);
out.write(s.c_str(),len);
}
-
+
void
binread(std::istream& in, std::string& s)
{
@@ -250,28 +250,28 @@ namespace ugdiss
buf[len] = 0;
s = buf;
}
-
+
void
binwrite(std::ostream& out, float x)
- {
- // IMPORTANT: this is not robust against the big/little endian
- // issue.
- out.write(reinterpret_cast<char*>(&x),sizeof(float));
+ {
+ // IMPORTANT: this is not robust against the big/little endian
+ // issue.
+ out.write(reinterpret_cast<char*>(&x),sizeof(float));
}
-
+
void
binread(std::istream& in, float& x)
- {
- // IMPORTANT: this is not robust against the big/little endian
- // issue.
- in.read(reinterpret_cast<char*>(&x),sizeof(x));
+ {
+ // IMPORTANT: this is not robust against the big/little endian
+ // issue.
+ in.read(reinterpret_cast<char*>(&x),sizeof(x));
}
-
+
char const *binread(char const* p, uint16_t& buf)
{
static char mask = 127;
- buf = (*p)&mask;
+ buf = (*p)&mask;
if (*p++ < 0) return p;
buf += uint16_t((*p)&mask)<<7;
if (*p++ < 0) return p;
@@ -294,26 +294,26 @@ namespace ugdiss
char const *binread(char const* p, uint32_t& buf)
{
static char mask = 127;
-
- if (*p < 0)
- {
- buf = (*p)&mask;
- return ++p;
+
+ if (*p < 0)
+ {
+ buf = (*p)&mask;
+ return ++p;
}
buf = *p;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<7;
return ++p;
}
buf += uint32_t(*p)<<7;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<14;
return ++p;
}
buf += uint32_t(*p)<<14;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<21;
return ++p;
@@ -331,56 +331,56 @@ namespace ugdiss
char const *binread(char const* p, filepos_type& buf)
{
static char mask = 127;
-
- if (*p < 0)
- {
- buf = (*p)&mask;
- return ++p;
+
+ if (*p < 0)
+ {
+ buf = (*p)&mask;
+ return ++p;
}
buf = *p;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<7;
return ++p;
}
buf += filepos_type(*p)<<7;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<14;
return ++p;
}
buf += filepos_type(*p)<<14;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<21;
return ++p;
}
buf += filepos_type(*p)<<21;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<28;
return ++p;
}
buf += filepos_type(*p)<<28;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<35;
return ++p;
}
buf += filepos_type(*p)<<35;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<42;
return ++p;
}
buf += filepos_type(*p)<<42;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<49;
return ++p;
}
buf += filepos_type(*p)<<49;
- if (*(++p) < 0)
+ if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<56;
return ++p;
diff --git a/moses/TranslationModel/UG/mm/tpt_pickler.h b/moses/TranslationModel/UG/mm/tpt_pickler.h
index 7305a858e..5ac71c16d 100644
--- a/moses/TranslationModel/UG/mm/tpt_pickler.h
+++ b/moses/TranslationModel/UG/mm/tpt_pickler.h
@@ -17,30 +17,30 @@ namespace ugdiss
/// @return the size of file fname.
::uint64_t getFileSize(const std::string& fname);
- /**
- * The following functions write and read data in a compact binary
+ /**
+ * The following functions write and read data in a compact binary
* representation. Write and read errors can be checked directly
* on the ostream object after the function call, so no return value is
* necessary.*/
- void binwrite(std::ostream& out, char data);
- void binwrite(std::ostream& out, unsigned char data);
+ void binwrite(std::ostream& out, char data);
+ void binwrite(std::ostream& out, unsigned char data);
void binwrite(std::ostream& out, unsigned short data);
void binwrite(std::ostream& out, unsigned int data);
void binwrite(std::ostream& out, unsigned long data);
void binwrite(std::ostream& out, size_t data);
void binwrite(std::ostream& out, unsigned long long data);
void binwrite(std::ostream& out, std::string const& data);
- void binwrite(std::ostream& out, float data);
+ void binwrite(std::ostream& out, float data);
- void binread(std::istream& in, char &data);
- void binread(std::istream& in, unsigned char &data);
+ void binread(std::istream& in, char &data);
+ void binread(std::istream& in, unsigned char &data);
void binread(std::istream& in, unsigned short &data);
void binread(std::istream& in, unsigned int &data);
void binread(std::istream& in, unsigned long &data);
void binread(std::istream& in, size_t &data);
void binread(std::istream& in, unsigned long long &data);
void binread(std::istream& in, std::string &data);
- void binread(std::istream& in, float &data);
+ void binread(std::istream& in, float &data);
char const *binread(char const* p, uint16_t& buf);
char const *binread(char const* p, uint32_t& buf);
@@ -68,11 +68,11 @@ namespace ugdiss
/*
template<typename WHATEVER>
- char const*
+ char const*
binread(char const* p, WHATEVER* buf);
template<typename numtype>
- char const*
+ char const*
binread(char const* p, numtype& buf);
*/
@@ -113,7 +113,7 @@ namespace ugdiss
p = binread(p,v[i]);
return p;
}
-
+
template<typename T>
T read(std::istream& in)
{
@@ -132,7 +132,7 @@ namespace ugdiss
template<typename T>
- void
+ void
binwrite(std::ostream& out, std::vector<T> const& data)
{
binwrite(out,data.size());
@@ -141,7 +141,7 @@ namespace ugdiss
}
template<typename T>
- void
+ void
binread(std::istream& in, std::vector<T>& data)
{
size_t s;
@@ -157,8 +157,8 @@ namespace ugdiss
{
size_t s; K k; V v;
binread(in,s);
- data.clear();
- // I have no idea why this is necessary, but it is, even when
+ data.clear();
+ // I have no idea why this is necessary, but it is, even when
// /data/ is supposed to be empty
for (size_t i = 0; i < s; i++)
{
@@ -174,7 +174,7 @@ namespace ugdiss
binwrite(std::ostream& out, std::map<K,V> const& data)
{
binwrite(out,data.size());
- for (typename std::map<K,V>::const_iterator m = data.begin();
+ for (typename std::map<K,V>::const_iterator m = data.begin();
m != data.end(); m++)
{
binwrite(out,m->first);
@@ -200,7 +200,7 @@ namespace ugdiss
template<typename WHATEVER>
- char const*
+ char const*
binread(char const* p, WHATEVER* buf)
{
#ifdef VERIFY_TIGHT_PACKING
@@ -209,6 +209,6 @@ namespace ugdiss
return binread(p,*buf);
}
-
+
} // end namespace ugdiss
#endif
diff --git a/moses/TranslationModel/UG/mm/tpt_tightindex.cc b/moses/TranslationModel/UG/mm/tpt_tightindex.cc
index da28c6d93..72cf0c183 100644
--- a/moses/TranslationModel/UG/mm/tpt_tightindex.cc
+++ b/moses/TranslationModel/UG/mm/tpt_tightindex.cc
@@ -8,10 +8,10 @@
*/
//
// ugTightIndex.cc
-//
+//
// Made by Ulrich Germann
// Login <germann@germann-laptop>
-//
+//
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
//
@@ -63,7 +63,7 @@ namespace ugdiss
// }
// #define LOG_WRITE_ACTIVITY
-
+
// write a key or value into a tight index
// flag indicates wheter it's a key or a value
void tightwrite(std::ostream& out, uint64_t data, bool flag)
@@ -80,10 +80,10 @@ namespace ugdiss
std::cerr << " with flag 1 ";
#endif
while (data >= 128)
- {
+ {
char c = char(data%128)|char(-128);
- out.put(c);
- data >>= 7;
+ out.put(c);
+ data >>= 7;
#ifdef LOG_WRITE_ACTIVITY
bytes_written++;
#endif
@@ -99,7 +99,7 @@ namespace ugdiss
while (data >= 128)
{
char c = data&127;
- out.put(c);
+ out.put(c);
data >>= 7;
#ifdef LOG_WRITE_ACTIVITY
bytes_written++;
@@ -112,16 +112,16 @@ namespace ugdiss
std::cerr << " in " << bytes_written << " bytes" << std::endl;
#endif
}
-
-// For the code below: does it make a difference if I hard-code the
+
+// For the code below: does it make a difference if I hard-code the
// unraveled loop or does code optimization by the compiler take care
// of that?
#define DEBUG_TIGHTREAD 0
- // read a key value from a tight index; filepos_type must be at least as
+ // read a key value from a tight index; filepos_type must be at least as
// large as count_type
- filepos_type
+ filepos_type
tightread(std::istream& in, std::ios::pos_type stop)
{
// debug=true;
@@ -131,8 +131,8 @@ namespace ugdiss
short int bitshift = 7;
int pos = in.tellg();
#if DEBUG_TIGHTREAD
- if (debug)
- cerr << bitpattern(uint(in.peek())) << " " << in.peek()
+ if (debug)
+ cerr << bitpattern(uint(in.peek())) << " " << in.peek()
<< " pos=" << in.tellg() << "\n";
#endif
int buf = in.get();
@@ -141,24 +141,24 @@ namespace ugdiss
else
stop = std::min(size_t(stop),size_t(in.tellg())+in.rdbuf()->in_avail());
if (buf < 0)
- std::cerr << "number read: " << buf << " " << pos << " "
+ std::cerr << "number read: " << buf << " " << pos << " "
<< in.tellg() << std::endl;
assert (buf>=0);
-
+
if (buf >= 128) // continuation bit is 1
{
data = buf-128; // unset the bit
while (in.tellg() < stop && in.peek() >= 128)
{
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
#endif
// cerr << bitpattern(size_t(in.peek())) << std::endl;
data += size_t(in.get()-128)<<bitshift;
bitshift += 7;
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << " " << data << " pos=" << in.tellg() << std::endl;
#endif
}
@@ -170,14 +170,14 @@ namespace ugdiss
{
// cerr << bitpattern(size_t(in.peek())) << std::endl;
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
-
+
#endif
data += size_t(in.get())<<bitshift;
bitshift += 7;
#if DEBUG_TIGHTREAD
- if (debug)
+ if (debug)
cerr << " " << data << " pos=" << in.tellg() << "\n";
#endif
}
@@ -189,16 +189,16 @@ namespace ugdiss
#if DEBUG_TIGHTFIND
bool debug=true;
#endif
- bool
+ bool
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop)
{
- in.seekg((start+stop)/2);
- // Jump approximately to the middle. Since we might land in the
- // middle of a number, we need to find the start of the next
+ in.seekg((start+stop)/2);
+ // Jump approximately to the middle. Since we might land in the
+ // middle of a number, we need to find the start of the next
// [index key/file offset] pair first. Bytes belonging to an index
- // key have the leftmost bit set to 0, bytes belonging to a file
+ // key have the leftmost bit set to 0, bytes belonging to a file
// offset have it set to 1
-
+
// if we landed in the middle of an index key, skip to the end of it
while (static_cast<filepos_type>(in.tellg()) < stop && in.get() < 128)
{
@@ -216,9 +216,9 @@ bool debug=true;
while (static_cast<filepos_type>(in.tellg()) < stop && in.peek() >= 128)
{
#if DEBUG_TIGHTFIND
- int r = in.get();
+ int r = in.get();
if (debug)
- std::cerr << in.tellg() << " skipped value byte " << r
+ std::cerr << in.tellg() << " skipped value byte " << r
<< " next is " << in.peek()
<< std::endl;
#else
@@ -227,9 +227,9 @@ bool debug=true;
}
return true;
}
-
- char const*
- tightfind_midpoint(char const* const start,
+
+ char const*
+ tightfind_midpoint(char const* const start,
char const* const stop)
{
char const* mp = start + (stop - start)/2;
@@ -238,46 +238,46 @@ bool debug=true;
return (*mp < 0) ? ++mp : mp;
}
- bool
- linear_search(std::istream& in, filepos_type start, filepos_type stop,
+ bool
+ linear_search(std::istream& in, filepos_type start, filepos_type stop,
id_type key, unsigned char& flags)
{ // performs a linear search in the range
in.seekg(start);
-
+
#if DEBUG_TIGHTFIND
if (debug) std::cerr << in.tellg() << " ";
#endif
-
- // ATTENTION! The bitshift operations below are important:
- // We use some of the bits in the key value to store additional
+
+ // ATTENTION! The bitshift operations below are important:
+ // We use some of the bits in the key value to store additional
// information about what and where node iformation is stored.
-
+
id_type foo;
- for(foo = tightread(in,stop);
- (foo>>FLAGBITS) < key;
- foo = tightread(in,stop))
+ for(foo = tightread(in,stop);
+ (foo>>FLAGBITS) < key;
+ foo = tightread(in,stop))
{
// skip the value associated with key /foo/
- while (static_cast<filepos_type>(in.tellg()) < stop
- && in.peek() >= 128) in.get();
-
+ while (static_cast<filepos_type>(in.tellg()) < stop
+ && in.peek() >= 128) in.get();
+
#if DEBUG_TIGHTFIND
- if (debug)
- std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
+ if (debug)
+ std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
<< in.tellg() << std::endl;
#endif
-
+
if (in.tellg() == std::ios::pos_type(stop))
return false; // not found
}
-
+
#if DEBUG_TIGHTFIND
- if (debug && (foo>>FLAGBITS)==key)
+ if (debug && (foo>>FLAGBITS)==key)
std::cerr << "found entry for " << key << std::endl;
- std::cerr << "current file position is " << in.tellg()
+ std::cerr << "current file position is " << in.tellg()
<< " (value read: " << key << std::endl;
#endif
-
+
assert(static_cast<filepos_type>(in.tellg()) < stop);
if ((foo>>FLAGBITS)==key)
{
@@ -288,51 +288,51 @@ bool debug=true;
else
return false;
}
-
+
bool
- tightfind(std::istream& in, filepos_type start, filepos_type stop,
+ tightfind(std::istream& in, filepos_type start, filepos_type stop,
id_type key, unsigned char& flags)
{
- // returns true if the value is found
+ // returns true if the value is found
#if DEBUG_TIGHTFIND
if (debug)
- std::cerr << "looking for " << key
+ std::cerr << "looking for " << key
<< " in range [" << start << ":" << stop << "]" << std::endl;
#endif
if (start==stop) return false;
assert(stop>start);
if ((start+1)==stop) return false; // list is empty
-
- unsigned int const granularity = sizeof(filepos_type)*5;
+
+ unsigned int const granularity = sizeof(filepos_type)*5;
// granularity: point where we should switch to linear search,
// because otherwise we might skip over the entry we are looking for
// because we land right in the middle of it.
-
+
if (stop > start + granularity)
- if (!tightfind_midpoint(in,start,stop))
+ if (!tightfind_midpoint(in,start,stop))
return false; // something went wrong (empty index)
-
+
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
{ // If the search range is very short, tightfind_midpoint might skip the
// entry we are loking for. In this case, we can afford a linear
// search
return linear_search(in,start,stop,key,flags);
}
-
+
// perform binary search
filepos_type curpos = in.tellg();
id_type foo = tightread(in,stop);
id_type tmpid = foo>>FLAGBITS;
- if (tmpid == key)
+ if (tmpid == key)
{
- flags = foo%256;
+ flags = foo%256;
flags &= FLAGMASK;
#if DEBUG_TIGHTFIND
if (debug) std::cerr << "found entry for " << key << std::endl;
#endif
- return true; // done, found
+ return true; // done, found
}
- else if (tmpid > key)
+ else if (tmpid > key)
{ // look in the lower half
#if DEBUG_TIGHTFIND
if (debug) std::cerr << foo << " > " << key << std::endl;
@@ -343,7 +343,7 @@ bool debug=true;
{ // look in the upper half
while (static_cast<filepos_type>(in.tellg()) < stop
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
- && in.peek() >= 128)
+ && in.peek() >= 128)
in.get(); // skip associated value
if (in.rdbuf()->in_avail() == 0 || in.tellg() == std::ios::pos_type(stop))
return false;
@@ -353,16 +353,16 @@ bool debug=true;
return tightfind(in,in.tellg(),stop,key,flags);
}
}
-
+
char const*
- tightfind(char const* const start,
+ tightfind(char const* const start,
char const* const stop,
- id_type key,
+ id_type key,
unsigned char& flags)
{
- // returns true if the value is found
-
+ // returns true if the value is found
+
if (start==stop) return NULL;
assert(stop>start);
if ((start+1)==stop) return NULL; // list is empty
@@ -374,11 +374,11 @@ bool debug=true;
id_type tmpId = foo>>FLAGBITS;
if (tmpId == key)
{
- flags = foo%256;
+ flags = foo%256;
flags &= FLAGMASK;
return after;
}
- else if (tmpId > key)
+ else if (tmpId > key)
{ // look in the lower half
return tightfind(start,p,key,flags);
}
@@ -389,14 +389,14 @@ bool debug=true;
return tightfind(after,stop,key,flags);
}
}
-
+
char const*
- tightfind_noflags(char const* const start,
+ tightfind_noflags(char const* const start,
char const* const stop,
id_type key)
{
- // returns true if the value is found
-
+ // returns true if the value is found
+
if (start==stop) return NULL;
assert(stop>start);
if ((start+1)==stop) return NULL; // list is empty
@@ -407,7 +407,7 @@ bool debug=true;
char const* after = tightread(p,stop,foo);
if (foo == key)
return after;
- else if (foo > key)
+ else if (foo > key)
{ // look in the lower half
return tightfind_noflags(start,p,key);
}
@@ -419,19 +419,19 @@ bool debug=true;
}
}
- bool
- linear_search_noflags(std::istream& in, filepos_type start,
+ bool
+ linear_search_noflags(std::istream& in, filepos_type start,
filepos_type stop, id_type key)
{ // performs a linear search in the range
- std::ios::pos_type mystop = stop;
+ std::ios::pos_type mystop = stop;
in.seekg(start);
id_type foo;
- for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
+ for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
{
// skip the value associated with key /foo/
- while (in.tellg() < mystop && in.peek() >= 128)
- in.get();
+ while (in.tellg() < mystop && in.peek() >= 128)
+ in.get();
if (in.tellg() == mystop)
return false; // not found
}
@@ -441,45 +441,45 @@ bool debug=true;
bool
- tightfind_noflags(std::istream& in, filepos_type start,
+ tightfind_noflags(std::istream& in, filepos_type start,
filepos_type stop, id_type key)
{
- // returns true if the value is found
+ // returns true if the value is found
if (start==stop) return false;
assert(stop>start);
if ((start+1)==stop) return false; // list is empty
-
+
// granularity: point where we should switch to linear search,
// because otherwise we might skip over the entry we are looking for
// because we land right in the middle of it.
- unsigned int const granularity = sizeof(filepos_type)*5;
+ unsigned int const granularity = sizeof(filepos_type)*5;
// UG: why 5? we should be able to get away with less!
-
+
if (stop > start + granularity)
- if (!tightfind_midpoint(in,start,stop))
+ if (!tightfind_midpoint(in,start,stop))
return false; // something went wrong (empty index)
-
+
// If the search range is very short, tightfind_midpoint might skip the
// entry we are loking for. In this case, we can afford a linear
// search
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
return linear_search_noflags(in,start,stop,key);
-
+
// Otherwise, perform binary search
filepos_type curpos = in.tellg();
id_type foo = tightread(in,stop);
- if (foo == key)
- return true; // done, found
+ if (foo == key)
+ return true; // done, found
else if (foo > key) // search first half
return tightfind_noflags(in,start,curpos,key);
else // search second half
- {
- std::ios::pos_type mystop = stop;
+ {
+ std::ios::pos_type mystop = stop;
while (in.tellg() < mystop
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
- && in.peek() >= 128)
+ && in.peek() >= 128)
in.get(); // skip associated value
if (in.rdbuf()->in_avail() == 0 || in.tellg() == mystop)
return false;
@@ -496,9 +496,9 @@ bool debug=true;
{
foo += 32768; // set first bit
while (data >= 32768) // = 2^15
- {
+ {
out.write(reinterpret_cast<char*>(&foo),2);
- data >>= 15;
+ data >>= 15;
foo = (data%32768)+32768;
}
}
@@ -507,7 +507,7 @@ bool debug=true;
while (data >= 32768) // = 2^15
{
out.write(reinterpret_cast<char*>(&foo),2);
- data >>= 15;
+ data >>= 15;
foo = data%32768;
}
}
@@ -515,8 +515,8 @@ bool debug=true;
}
char const*
- tightread8(char const* start,
- char const* stop,
+ tightread8(char const* start,
+ char const* stop,
uint64_t& dest)
{
static char bitmask=127;
@@ -570,8 +570,8 @@ bool debug=true;
}
char const*
- tightread4(char const* start,
- char const* stop,
+ tightread4(char const* start,
+ char const* stop,
uint32_t& dest)
{
static char bitmask=127;
@@ -605,8 +605,8 @@ bool debug=true;
}
char const*
- tightread2(char const* start,
- char const* stop,
+ tightread2(char const* start,
+ char const* stop,
uint16_t& dest)
{
static char bitmask=127;
diff --git a/moses/TranslationModel/UG/mm/tpt_tightindex.h b/moses/TranslationModel/UG/mm/tpt_tightindex.h
index 66594bc0a..967215aeb 100644
--- a/moses/TranslationModel/UG/mm/tpt_tightindex.h
+++ b/moses/TranslationModel/UG/mm/tpt_tightindex.h
@@ -28,46 +28,46 @@ extern bool debug;
namespace ugdiss
{
// void tightwritex(iostream& out, size_t data, bool flag);
- void
+ void
tightwrite(std::ostream& out, ::uint64_t data, bool flag);
- filepos_type
+ filepos_type
tightread(std::istream& in, std::ios::pos_type stop);
bool
- tightfind(std::istream& in,
- filepos_type start,
- filepos_type stop,
+ tightfind(std::istream& in,
+ filepos_type start,
+ filepos_type stop,
id_type key,
unsigned char& flags);
bool
- tightfind_noflags(std::istream& in,
- filepos_type start,
- filepos_type stop,
+ tightfind_noflags(std::istream& in,
+ filepos_type start,
+ filepos_type stop,
id_type key);
char const*
- tightfind(char const* const start,
+ tightfind(char const* const start,
char const* const stop,
- id_type key,
+ id_type key,
unsigned char& flags);
char const*
- tightfind_noflags(char const* const start,
+ tightfind_noflags(char const* const start,
char const* const stop,
id_type key);
- /** move read header in istream /in/ to the first entry after the midpoint of
- * file position range [start,stop) in in a 'tight' index
+ /** move read header in istream /in/ to the first entry after the midpoint of
+ * file position range [start,stop) in in a 'tight' index
* @param in the data input stream
* @param start start of the search range
* @param stop end of the search range
- * @return true if no errors occurred
- */
- bool
+ * @return true if no errors occurred
+ */
+ bool
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop);
// the bitpattern functions below are for debugging
@@ -115,8 +115,8 @@ namespace ugdiss
#if 0
template<typename dtype>
- char const*
- tightread(char const* start,
+ char const*
+ tightread(char const* start,
char const* stop,
dtype& dest)
{
diff --git a/moses/TranslationModel/UG/mm/tpt_tokenindex.cc b/moses/TranslationModel/UG/mm/tpt_tokenindex.cc
index c6704beac..5fc6a6acc 100644
--- a/moses/TranslationModel/UG/mm/tpt_tokenindex.cc
+++ b/moses/TranslationModel/UG/mm/tpt_tokenindex.cc
@@ -15,15 +15,15 @@ namespace ugdiss
{
TokenIndex::
- TokenIndex(string unkToken)
+ TokenIndex(string unkToken)
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0)
- {
+ {
lock.reset(new boost::mutex());
};
-
+
#if 0
TokenIndex::
- TokenIndex(string fname, string unkToken,bool dyna)
+ TokenIndex(string fname, string unkToken,bool dyna)
: ridx(0),unkLabel(unkToken)
{
this->open(fname,unkToken,dyna);
@@ -58,8 +58,8 @@ namespace ugdiss
if (!unkToken.empty())
{
Entry const* bla = lower_bound(startIdx,endIdx,unkToken.c_str(),comp);
- unkId = ((bla < endIdx && unkToken == comp.base+bla->offset)
- ? bla->id
+ unkId = ((bla < endIdx && unkToken == comp.base+bla->offset)
+ ? bla->id
: numTokens);
}
this->dynamic=dyna;
@@ -69,7 +69,7 @@ namespace ugdiss
this->newWords.reset(new vector<string>());
}
}
-
+
void
TokenIndex::
close()
@@ -79,9 +79,9 @@ namespace ugdiss
TokenIndex::
CompFunc::
- CompFunc()
+ CompFunc()
{};
-
+
bool
TokenIndex::
CompFunc::
@@ -90,7 +90,7 @@ namespace ugdiss
return strcmp(base+A.offset,w) < 0;
};
- id_type
+ id_type
TokenIndex::
operator[](char const* p) const
{
@@ -101,7 +101,7 @@ namespace ugdiss
if (!dynamic) return unkId;
boost::lock_guard<boost::mutex> lk(*this->lock);
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items
- // IMPORTANT: numTokens is not currently not changed, it is the number of
+ // IMPORTANT: numTokens is not currently not changed, it is the number of
// PRE-EXISING TOKENS, not including dynamically added Items
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
@@ -110,14 +110,14 @@ namespace ugdiss
return foo.first->second;
}
- id_type
+ id_type
TokenIndex::
operator[](string const& w) const
{
return (*this)[w.c_str()];
}
- vector<char const*>
+ vector<char const*>
TokenIndex::
reverseIndex() const
{
@@ -125,11 +125,11 @@ namespace ugdiss
// cout << "tokenindex has " << numToks << " tokens" << endl;
- vector<char const*> v(numToks,NULL);
+ vector<char const*> v(numToks,NULL);
// v.reserve(endIdx-startIdx);
for (Entry const* x = startIdx; x != endIdx; x++)
{
- if (x->id >= v.size())
+ if (x->id >= v.size())
v.resize(x->id+1);
v[x->id] = comp.base+x->offset;
}
@@ -141,12 +141,12 @@ namespace ugdiss
TokenIndex::
operator[](id_type id) const
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
}
- if (id < ridx.size())
+ if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
@@ -156,26 +156,26 @@ namespace ugdiss
void
TokenIndex::
- iniReverseIndex()
+ iniReverseIndex()
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
}
}
-
+
char const* const
TokenIndex::
- operator[](id_type id)
+ operator[](id_type id)
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
}
- if (id < ridx.size())
+ if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
@@ -183,11 +183,11 @@ namespace ugdiss
return unkLabel.c_str();
}
- string
+ string
TokenIndex::
- toString(vector<id_type> const& v)
+ toString(vector<id_type> const& v)
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -198,11 +198,11 @@ namespace ugdiss
return buf.str();
}
- string
+ string
TokenIndex::
toString(vector<id_type> const& v) const
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -213,11 +213,11 @@ namespace ugdiss
return buf.str();
}
- string
+ string
TokenIndex::
- toString(id_type const* start, id_type const* const stop)
+ toString(id_type const* start, id_type const* const stop)
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -230,11 +230,11 @@ namespace ugdiss
return buf.str();
}
- string
+ string
TokenIndex::
toString(id_type const* start, id_type const* const stop) const
{
- if (!ridx.size())
+ if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
if (!ridx.size()) ridx = reverseIndex();
@@ -266,7 +266,7 @@ namespace ugdiss
{
bool allgood = true; string w;
v.clear();
- for (istringstream buf(line); buf>>w;)
+ for (istringstream buf(line); buf>>w;)
{
v.push_back((*this)[w]);
allgood = allgood && v.back() > 1;
@@ -325,15 +325,15 @@ namespace ugdiss
}
void
- write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
+ write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken)
{
typedef pair<uint32_t,id_type> IndexEntry; // offset and id
// Write token strings to a buffer, keep track of offsets
- vector<IndexEntry> index(tok.size());
+ vector<IndexEntry> index(tok.size());
ostringstream data;
- id_type unkId = tok.size();
+ id_type unkId = tok.size();
for (size_t i = 0; i < tok.size(); i++)
{
if (tok[i].first == unkToken)
@@ -342,7 +342,7 @@ namespace ugdiss
index[i].second = tok[i].second; // respective ID
data<<tok[i].first<<char(0); // write string to buffer
}
-
+
// Now write the actual file
ofstream out(ofile.c_str());
uint32_t vsize = index.size(); // how many vocab items?
@@ -356,26 +356,26 @@ namespace ugdiss
out<<data.str();
}
- void
+ void
TokenIndex::
write(string fname)
{
typedef pair<string,uint32_t> Token; // token and id
- vector<Token> tok(totalVocabSize());
+ vector<Token> tok(totalVocabSize());
for (id_type i = 0; i < tok.size(); ++i)
tok[i] = Token((*this)[i],i);
sort(tok.begin(),tok.end());
write_tokenindex_to_disk(tok,fname,unkLabel);
}
-
- bool
+
+ bool
TokenIndex::
- isDynamic() const
+ isDynamic() const
{
return dynamic;
}
- bool
+ bool
TokenIndex::
setDynamic(bool on)
{
@@ -393,7 +393,7 @@ namespace ugdiss
}
return ret;
}
-
+
void
TokenIndex::
setUnkLabel(string unk)
diff --git a/moses/TranslationModel/UG/mm/tpt_tokenindex.h b/moses/TranslationModel/UG/mm/tpt_tokenindex.h
index 3051f07a5..9f7c69b3e 100644
--- a/moses/TranslationModel/UG/mm/tpt_tokenindex.h
+++ b/moses/TranslationModel/UG/mm/tpt_tokenindex.h
@@ -3,7 +3,7 @@
//
// - Vocab items should be stored in order of ids, so that we can determine their length
// by taking computing V[id+1] - V[id] instead of using strlen.
-//
+//
// (c) 2007,2008 Ulrich Germann
#ifndef __ugTokenIndex_hh
@@ -30,7 +30,7 @@ namespace ugdiss
/** Reverse index: maps from ID to char const* */
mutable vector<char const*> ridx;
/** Label for the UNK token */
- string unkLabel;
+ string unkLabel;
id_type unkId,numTokens;
/// New 2013-09-02: thread-safe
@@ -42,9 +42,9 @@ namespace ugdiss
boost::shared_ptr<vector<string> > newWords;
// The use of pointers to external items is a bit of a bad hack
// in terms of the semantic of TokenIndex const: since external items
- // are changed, the TokenIndex instance remains unchanged and const works,
- // even though in reality the underlying object on the coceptual level
- // *IS* changed. This means that dynamic TokenIndex instances are not
+ // are changed, the TokenIndex instance remains unchanged and const works,
+ // even though in reality the underlying object on the coceptual level
+ // *IS* changed. This means that dynamic TokenIndex instances are not
// thread-safe!
public:
@@ -53,7 +53,7 @@ namespace ugdiss
{
public:
uint32_t offset;
- id_type id;
+ id_type id;
};
/** Comparison function object used for Entry instances */
@@ -111,19 +111,19 @@ namespace ugdiss
void setUnkLabel(string unk);
};
- void
- write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
+ void
+ write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken);
/** for sorting words by frequency */
class compWords
{
string unk;
- public:
+ public:
compWords(string _unk) : unk(_unk) {};
-
+
bool
- operator()(pair<string,size_t> const& A,
+ operator()(pair<string,size_t> const& A,
pair<string,size_t> const& B) const
{
if (A.first == unk) return false;// do we still need this special treatment?
@@ -142,7 +142,7 @@ namespace ugdiss
typedef pair<string,uint32_t> Token; // token and id
- // first, sort the word list in decreasing order of frequency, so that we
+ // first, sort the word list in decreasing order of frequency, so that we
// can assign IDs in an encoding-efficient manner (high frequency. low ID)
vector<pair<string,size_t> > wcounts(M.size()); // for sorting by frequency
typedef typename MYMAP::const_iterator myIter;
@@ -156,16 +156,16 @@ namespace ugdiss
sort(wcounts.begin(),wcounts.end(),compFunc);
// Assign IDs ...
- vector<Token> tok(wcounts.size());
+ vector<Token> tok(wcounts.size());
for (size_t i = 0; i < wcounts.size(); i++)
tok[i] = Token(wcounts[i].first,i);
// and re-sort in alphabetical order
- sort(tok.begin(),tok.end());
+ sort(tok.begin(),tok.end());
write_tokenindex_to_disk(tok,ofile,unkToken);
}
template<typename Token>
- void
+ void
fill_token_seq(TokenIndex& V, string const& line, vector<Token>& dest)
{
istringstream buf(line); string w;
diff --git a/moses/TranslationModel/UG/mm/tpt_typedefs.h b/moses/TranslationModel/UG/mm/tpt_typedefs.h
index fea221d61..d2d2932de 100644
--- a/moses/TranslationModel/UG/mm/tpt_typedefs.h
+++ b/moses/TranslationModel/UG/mm/tpt_typedefs.h
@@ -12,4 +12,4 @@ namespace ugdiss
typedef uint64_t filepos_type;
typedef unsigned char uchar;
}
-#endif
+#endif
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.cc b/moses/TranslationModel/UG/mm/ug_bitext.cc
index d2899e677..809476aa9 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext.cc
@@ -8,18 +8,18 @@ using namespace ugdiss;
using namespace std;
namespace Moses
{
- namespace bitext
+ namespace bitext
{
- float
+ float
lbop(size_t const tries, size_t const succ, float const confidence)
{
- return (confidence == 0
- ? float(succ)/tries
+ return (confidence == 0
+ ? float(succ)/tries
: (boost::math::binomial_distribution<>::
find_lower_bound_on_p(tries, succ, confidence)));
}
-
+
// template<>
void
@@ -42,37 +42,37 @@ namespace Moses
else
index.reset(new imTSA<tkn>(track,NULL,NULL));
}
-
+
snt_adder<L2R_Token<SimpleWordId> >::
- snt_adder(vector<string> const& s, TokenIndex& v,
- sptr<imTtrack<L2R_Token<SimpleWordId> > >& t,
+ snt_adder(vector<string> const& s, TokenIndex& v,
+ sptr<imTtrack<L2R_Token<SimpleWordId> > >& t,
sptr<imTSA<L2R_Token<SimpleWordId> > >& i)
- : snt(s), V(v), track(t), index(i)
+ : snt(s), V(v), track(t), index(i)
{ }
- bool
+ bool
expand_phrase_pair
- (vector<vector<ushort> >& a1,
+ (vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
ushort const s2, // next word on in target side
ushort const L1, ushort const R1, // limits of previous phrase
ushort & s1, ushort & e1, ushort& e2) // start/end src; end trg
{
- if (a2[s2].size() == 0)
+ if (a2[s2].size() == 0)
{
cout << __FILE__ << ":" << __LINE__ << endl;
return false;
}
bitvector done1(a1.size());
bitvector done2(a2.size());
- vector <pair<ushort,ushort> > agenda;
+ vector <pair<ushort,ushort> > agenda;
// x.first: side (1 or 2)
// x.second: word position
agenda.reserve(a1.size() + a2.size());
agenda.push_back(pair<ushort,ushort>(2,s2));
e2 = s2;
s1 = e1 = a2[s2].front();
- if (s1 >= L1 && s1 < R1)
+ if (s1 >= L1 && s1 < R1)
{
cout << __FILE__ << ":" << __LINE__ << endl;
return false;
@@ -88,14 +88,14 @@ namespace Moses
done1.set(p);
BOOST_FOREACH(ushort i, a1[p])
{
- if (i < s2)
+ if (i < s2)
{
// cout << __FILE__ << ":" << __LINE__ << endl;
return false;
}
if (done2[i]) continue;
for (;e2 <= i;++e2)
- if (!done2[e2])
+ if (!done2[e2])
agenda.push_back(pair<ushort,ushort>(2,e2));
}
}
@@ -104,16 +104,16 @@ namespace Moses
done2.set(p);
BOOST_FOREACH(ushort i, a2[p])
{
- if ((e1 < L1 && i >= L1) ||
- (s1 >= R1 && i < R1) ||
+ if ((e1 < L1 && i >= L1) ||
+ (s1 >= R1 && i < R1) ||
(i >= L1 && i < R1))
{
- // cout << __FILE__ << ":" << __LINE__ << " "
- // << L1 << "-" << R1 << " " << i << " "
+ // cout << __FILE__ << ":" << __LINE__ << " "
+ // << L1 << "-" << R1 << " " << i << " "
// << s1 << "-" << e1<< endl;
return false;
}
-
+
if (e1 < i)
{
for (; e1 <= i; ++e1)
@@ -134,7 +134,7 @@ namespace Moses
return true;
}
- void
+ void
print_amatrix(vector<vector<ushort> > a1, uint32_t len2,
ushort b1, ushort e1, ushort b2, ushort e2)
{
@@ -163,7 +163,7 @@ namespace Moses
cout << string(90,'-') << endl;
}
- void
+ void
write_bitvector(bitvector const& v, ostream& out)
{
for (size_t i = v.find_first(); i < v.size();)
diff --git a/moses/TranslationModel/UG/mm/ug_bitext.h b/moses/TranslationModel/UG/mm/ug_bitext.h
index 7fb07fc26..ab5f2a24f 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext.h
@@ -2,18 +2,18 @@
#pragma once
// Implementations of word-aligned bitext.
// Written by Ulrich Germann
-//
+//
// mmBitext: static, memory-mapped bitext
// imBitext: dynamic, in-memory bitext
//
// things we can do to speed up things:
-// - set up threads at startup time that force the
+// - set up threads at startup time that force the
// data in to memory sequentially
//
-// - use multiple agendas for better load balancing and to avoid
+// - use multiple agendas for better load balancing and to avoid
// competition for locks
-//
+//
#define UG_BITEXT_TRACK_ACTIVE_THREADS 0
@@ -70,7 +70,7 @@ namespace Moses {
float lbop(size_t const tries, size_t const succ, float const confidence);
void write_bitvector(bitvector const& v, ostream& out);
- struct
+ struct
ContextForQuery
{
// needs to be made thread-safe
@@ -85,7 +85,7 @@ namespace Moses {
template<typename TKN>
- class Bitext
+ class Bitext
{
public:
typedef TKN Token;
@@ -98,19 +98,19 @@ namespace Moses {
mutable boost::shared_mutex m_lock; // for thread-safe operation
class agenda; // for parallel sampling see ug_bitext_agenda.h
- mutable sptr<agenda> ag;
+ mutable sptr<agenda> ag;
size_t m_num_workers; // number of workers available to the agenda
- size_t m_default_sample_size;
+ size_t m_default_sample_size;
size_t m_pstats_cache_threshold; // threshold for caching sampling results
sptr<pstats::cache_t> m_cache1, m_cache2; // caches for sampling results
-
+
vector<string> m_docname;
map<string,id_type> m_docname2docid; // maps from doc names to ids
sptr<std::vector<id_type> > m_sid2docid; // maps from sentences to docs (ids)
mutable pplist_cache_t m_pplist_cache1, m_pplist_cache2;
- // caches for unbiased sampling; biased sampling uses the caches that
+ // caches for unbiased sampling; biased sampling uses the caches that
// are stored locally on the translation task
public:
@@ -123,9 +123,9 @@ namespace Moses {
sptr<TSA<Token> > I2; // indices
/// given the source phrase sid[start:stop]
- // find the possible start (s1 .. s2) and end (e1 .. e2)
+ // find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
- // alignments in *core_alignment. If /flip/, source phrase is
+ // alignments in *core_alignment. If /flip/, source phrase is
// L2.
bool find_trg_phr_bounds
( size_t const sid, // sentence to investigate
@@ -136,27 +136,27 @@ namespace Moses {
int& po_fwd, int& po_bwd, // phrase orientations
std::vector<uchar> * core_alignment, // stores the core alignment
bitvector* full_alignment, // stores full word alignment for this sent.
- bool const flip) const; // flip source and target (reverse lookup)
-
- // prep2 launches sampling and returns immediately.
+ bool const flip) const; // flip source and target (reverse lookup)
+
+ // prep2 launches sampling and returns immediately.
// lookup (below) waits for the job to finish before it returns
- sptr<pstats>
+ sptr<pstats>
prep2(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
-
+
public:
Bitext(size_t const max_sample = 1000, size_t const xnum_workers = 16);
- Bitext(Ttrack<Token>* const t1, Ttrack<Token>* const t2,
- Ttrack<char>* const tx,
+ Bitext(Ttrack<Token>* const t1, Ttrack<Token>* const t2,
+ Ttrack<char>* const tx,
TokenIndex* const v1, TokenIndex* const v2,
TSA<Token>* const i1, TSA<Token>* const i2,
- size_t const max_sample=1000,
+ size_t const max_sample=1000,
size_t const xnum_workers=16);
-
- virtual void
+
+ virtual void
open(string const base, string const L1, string const L2) = 0;
-
- sptr<pstats>
+
+ sptr<pstats>
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
void prep(ttasksptr const& ttask, iter const& phrase) const;
@@ -176,7 +176,7 @@ namespace Moses {
void
- mark_match(Token const* start, Token const* end, iter const& m,
+ mark_match(Token const* start, Token const* end, iter const& m,
bitvector& check) const;
void
write_yawat_alignment
@@ -184,10 +184,10 @@ namespace Moses {
#if 0
// needs to be adapted to the new API
void
- lookup(std::vector<Token> const& snt, TSA<Token>& idx,
+ lookup(std::vector<Token> const& snt, TSA<Token>& idx,
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
std::vector<std::vector<uint64_t> >* pidmap = NULL,
- typename PhrasePair<Token>::Scorer* scorer=NULL,
+ typename PhrasePair<Token>::Scorer* scorer=NULL,
sptr<SamplingBias const> const bias,
bool multithread=true) const;
#endif
@@ -233,32 +233,32 @@ namespace Moses {
Token const* t = (isL2 ? T2 : T1)->sntStart(sid) + off;
Token const* x = t + len;
TokenIndex const& V = isL2 ? *V2 : *V1;
- while (t < x)
+ while (t < x)
{
buf << V[t->id()];
if (++t < x) buf << " ";
}
return buf.str();
}
-
+
template<typename Token>
- size_t
+ size_t
Bitext<Token>::
- getDefaultSampleSize() const
- {
- return m_default_sample_size;
+ getDefaultSampleSize() const
+ {
+ return m_default_sample_size;
}
template<typename Token>
- void
+ void
Bitext<Token>::
setDefaultSampleSize(size_t const max_samples)
- {
+ {
boost::unique_lock<boost::shared_mutex> guard(m_lock);
- if (max_samples != m_default_sample_size)
+ if (max_samples != m_default_sample_size)
{
m_cache1.reset(new pstats::cache_t);
m_cache2.reset(new pstats::cache_t);
- m_default_sample_size = max_samples;
+ m_default_sample_size = max_samples;
}
}
@@ -274,12 +274,12 @@ namespace Moses {
template<typename Token>
Bitext<Token>::
- Bitext(Ttrack<Token>* const t1,
- Ttrack<Token>* const t2,
+ Bitext(Ttrack<Token>* const t1,
+ Ttrack<Token>* const t2,
Ttrack<char>* const tx,
- TokenIndex* const v1,
+ TokenIndex* const v1,
TokenIndex* const v2,
- TSA<Token>* const i1,
+ TSA<Token>* const i1,
TSA<Token>* const i2,
size_t const max_sample,
size_t const xnum_workers)
@@ -294,7 +294,7 @@ namespace Moses {
template<typename TKN> class snt_adder;
template<> class snt_adder<L2R_Token<SimpleWordId> >;
- template<>
+ template<>
class snt_adder<L2R_Token<SimpleWordId> >
{
typedef L2R_Token<SimpleWordId> TKN;
@@ -303,9 +303,9 @@ namespace Moses {
sptr<imTtrack<TKN> > & track;
sptr<imTSA<TKN > > & index;
public:
- snt_adder(std::vector<string> const& s, TokenIndex& v,
+ snt_adder(std::vector<string> const& s, TokenIndex& v,
sptr<imTtrack<TKN> >& t, sptr<imTSA<TKN> >& i);
-
+
void operator()();
};
@@ -313,17 +313,17 @@ namespace Moses {
bool
Bitext<Token>::
find_trg_phr_bounds
- (size_t const sid,
+ (size_t const sid,
size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
int & po_fwd, int & po_bwd,
- std::vector<uchar>* core_alignment, bitvector* full_alignment,
+ std::vector<uchar>* core_alignment, bitvector* full_alignment,
bool const flip) const
{
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
// a word on the core_alignment:
- //
+ //
// since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1
// < e2, respectively) are be definition unaligned, we store
// only the core alignment in *core_alignment it is up to the
@@ -364,18 +364,18 @@ namespace Moses {
else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
UTIL_THROW_IF2((src >= slen1 || trg >= slen2),
- "Alignment range error at sentence " << sid << "!\n"
- << src << "/" << slen1 << " " <<
+ "Alignment range error at sentence " << sid << "!\n"
+ << src << "/" << slen1 << " " <<
trg << "/" << slen2);
-
- if (src < start || src >= stop)
+
+ if (src < start || src >= stop)
forbidden.set(trg);
else
{
lft = min(lft,trg);
rgt = max(rgt,trg);
}
- if (core_alignment)
+ if (core_alignment)
{
aln1[src].push_back(trg);
aln2[trg].push_back(src);
@@ -383,16 +383,16 @@ namespace Moses {
if (full_alignment)
full_alignment->set(src*slen2 + trg);
}
-
+
for (size_t i = lft; i <= rgt; ++i)
- if (forbidden[i])
+ if (forbidden[i])
return false;
-
+
s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
-
+
if (lft > rgt) return false;
- if (core_alignment)
+ if (core_alignment)
{
core_alignment->clear();
for (size_t i = start; i < stop; ++i)
@@ -417,7 +417,7 @@ namespace Moses {
( string const& bserver, string const& text, ostream* log ) const
{
sptr<DocumentBias> ret;
- ret.reset(new DocumentBias(*m_sid2docid, m_docname2docid,
+ ret.reset(new DocumentBias(*m_sid2docid, m_docname2docid,
bserver, text, log));
return ret;
}
@@ -435,15 +435,15 @@ namespace Moses {
// and waits until the sampling is finished before it returns.
// This allows sampling in the background
template<typename Token>
- sptr<pstats>
+ sptr<pstats>
Bitext<Token>
- ::prep2
+ ::prep2
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
if (max_sample < 0) max_sample = m_default_sample_size;
sptr<ContextScope> scope = ttask->GetScope();
sptr<ContextForQuery> context = scope->get<ContextForQuery>(this);
- sptr<SamplingBias> bias;
+ sptr<SamplingBias> bias;
if (context) bias = context->bias;
sptr<pstats::cache_t> cache;
@@ -451,9 +451,9 @@ namespace Moses {
// (still need to test what a good caching threshold is ...)
// - use the task-specific cache when there is a sampling bias
if (max_sample == int(m_default_sample_size)
- && phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
+ && phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
{
- cache = (phrase.root == I1.get()
+ cache = (phrase.root == I1.get()
? (bias ? context->cache1 : m_cache1)
: (bias ? context->cache2 : m_cache2));
// if (bias) cerr << "Using bias." << endl;
@@ -461,17 +461,17 @@ namespace Moses {
sptr<pstats> ret;
sptr<pstats> const* cached;
- if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
+ if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
return *cached;
boost::unique_lock<boost::shared_mutex> guard(m_lock);
- if (!ag)
+ if (!ag)
{
ag.reset(new agenda(*this));
if (m_num_workers > 1)
ag->add_workers(m_num_workers);
}
// cerr << "NEW FREQUENT PHRASE: "
- // << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
+ // << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
// << " at " << __FILE__ << ":" << __LINE__ << endl;
ret = ag->add_job(this, phrase, max_sample, bias);
if (cache) cache->set(phrase.getPid(),ret);
@@ -497,8 +497,8 @@ namespace Moses {
// CONSTRUCTOR
pstats2pplist(typename TSA<Token>::tree_iterator const& m,
Ttrack<Token> const& other,
- sptr<pstats> const& ps,
- std::vector<PhrasePair<Token> >& dest,
+ sptr<pstats> const& ps,
+ std::vector<PhrasePair<Token> >& dest,
typename PhrasePair<Token>::Scorer const* scorer)
: m_other(other)
, m_pstats(ps)
@@ -509,17 +509,17 @@ namespace Moses {
, m_pid1(m.getPid())
, m_is_inverse(false)
{ }
-
+
// WORKER
- void
- operator()()
+ void
+ operator()()
{
// wait till all statistics have been collected
boost::unique_lock<boost::mutex> lock(m_pstats->lock);
while (m_pstats->in_progress)
m_pstats->ready.wait(lock);
- m_pp.init(m_pid1, m_is_inverse, m_token,m_len,m_pstats.get(),0);
+ m_pp.init(m_pid1, m_is_inverse, m_token,m_len,m_pstats.get(),0);
// convert pstats entries to phrase pairs
pstats::trg_map_t::iterator a;
@@ -531,8 +531,8 @@ namespace Moses {
m_pp.good2 = max(uint32_t(m_pp.raw2 * float(m_pp.good1)/m_pp.raw1),
m_pp.joint);
size_t J = m_pp.joint<<7; // hard coded threshold of 1/128
- if (m_pp.good1 > J || m_pp.good2 > J) continue;
- if (m_scorer)
+ if (m_pp.good1 > J || m_pp.good2 > J) continue;
+ if (m_scorer)
{
(*m_scorer)(m_pp);
}
@@ -543,23 +543,23 @@ namespace Moses {
}
};
-#if 0
+#if 0
template<typename Token>
void
Bitext<Token>::
- lookup(std::vector<Token> const& snt, TSA<Token>& idx,
+ lookup(std::vector<Token> const& snt, TSA<Token>& idx,
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
std::vector<std::vector<uint64_t> >* pidmap,
typename PhrasePair<Token>::Scorer* scorer,
sptr<SamplingBias const> const& bias, bool multithread) const
{
// typedef std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > > ret_t;
-
- dest.clear();
+
+ dest.clear();
dest.resize(snt.size());
if (pidmap) { pidmap->clear(); pidmap->resize(snt.size()); }
- // collect statistics in parallel, then build PT entries as
+ // collect statistics in parallel, then build PT entries as
// the sampling finishes
bool fwd = &idx == I1.get();
std::vector<boost::thread*> workers; // background threads doing the lookup
@@ -574,16 +574,16 @@ namespace Moses {
uint64_t key = m.getPid();
if (pidmap) (*pidmap)[i].push_back(key);
sptr<std::vector<PhrasePair<Token> > > pp = C.get(key);
- if (pp)
+ if (pp)
dest[i].push_back(pp);
- else
+ else
{
pp.reset(new std::vector<PhrasePair<Token> >());
C.set(key,pp);
dest[i].push_back(pp);
sptr<pstats> x = prep2(m, this->default_sample_size,bias);
pstats2pplist<Token> w(m,*(fwd?T2:T1),x,*pp,scorer);
- if (multithread)
+ if (multithread)
{
boost::thread* t = new boost::thread(w);
workers.push_back(t);
@@ -592,16 +592,16 @@ namespace Moses {
}
}
}
- for (size_t w = 0; w < workers.size(); ++w)
+ for (size_t w = 0; w < workers.size(); ++w)
{
- workers[w]->join();
+ workers[w]->join();
delete workers[w];
}
}
-#endif
+#endif
template<typename Token>
- sptr<pstats>
+ sptr<pstats>
Bitext<Token>::
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
@@ -615,7 +615,7 @@ namespace Moses {
boost::unique_lock<boost::shared_mutex> guard(m_lock);
typename agenda::worker(*this->ag)();
}
- else
+ else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
@@ -639,7 +639,7 @@ namespace Moses {
Token const* a = x;
Token const* b = s;
size_t i = 0;
- while (a && b && a->id() == b->id() && i < m.size())
+ while (a && b && a->id() == b->id() && i < m.size())
{
++i;
a = a->next();
@@ -669,7 +669,7 @@ namespace Moses {
pair<bitvector,bitvector> ag;
ag.first.resize(a1.size());
ag.second.resize(a2.size());
- char const* x = Tx->sntStart(sid);
+ char const* x = Tx->sntStart(sid);
size_t a, b;
while (x < Tx->sntEnd(sid))
{
@@ -677,11 +677,11 @@ namespace Moses {
x = binread(x,b);
if (a1.at(a) < 0 && a2.at(b) < 0)
{
- a1[a] = a2[b] = agroups.size();
- ag.first.reset();
- ag.second.reset();
- ag.first.set(a);
- ag.second.set(b);
+ a1[a] = a2[b] = agroups.size();
+ ag.first.reset();
+ ag.second.reset();
+ ag.first.set(a);
+ ag.second.set(b);
agroups.push_back(ag);
grouplabel.push_back(f1[a] || f2[b] ? "infocusbi" : "unspec");
}
@@ -697,7 +697,7 @@ namespace Moses {
agroups[a1[a]].second.set(b);
if (f1[a] || f2[b]) grouplabel[a1[a]] = "infocusbi";
}
- else
+ else
{
agroups[a1[a]].first |= agroups[a2[b]].first;
agroups[a1[a]].second |= agroups[a2[b]].second;
@@ -705,10 +705,10 @@ namespace Moses {
if (f1[a] || f2[b]) grouplabel[a1[a]] = "infocusbi";
}
}
-
+
for (a = 0; a < a1.size(); ++a)
{
- if (a1[a] < 0)
+ if (a1[a] < 0)
{
if (f1[a]) out << a << "::" << "infocusmono ";
continue;
@@ -729,7 +729,7 @@ namespace Moses {
#if 0
template<typename Token>
- sptr<pstats>
+ sptr<pstats>
Bitext<Token>::
lookup(siter const& phrase, size_t const max_sample,
sptr<SamplingBias const> const& bias) const
@@ -738,7 +738,7 @@ namespace Moses {
boost::unique_lock<boost::shared_mutex> guard(m_lock);
if (this->num_workers <= 1)
typename agenda::worker(*this->ag)();
- else
+ else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
@@ -747,25 +747,25 @@ namespace Moses {
return ret;
}
#endif
-
+
template<typename Token>
- void
- expand(typename Bitext<Token>::iter const& m,
- Bitext<Token> const& bt, pstats const& ps,
+ void
+ expand(typename Bitext<Token>::iter const& m,
+ Bitext<Token> const& bt, pstats const& ps,
std::vector<PhrasePair<Token> >& dest, ostream* log)
{
bool fwd = m.root == bt.I1.get();
dest.reserve(ps.trg.size());
PhrasePair<Token> pp;
pp.init(m.getPid(), !fwd, m.getToken(0), m.size(), &ps, 0);
- // cout << HERE << " "
+ // cout << HERE << " "
// << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << endl;
pstats::trg_map_t::const_iterator a;
for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
{
uint32_t sid,off,len;
parse_pid(a->first, sid, off, len);
- pp.update(a->first, (fwd ? bt.T2 : bt.T1)->sntStart(sid)+off,
+ pp.update(a->first, (fwd ? bt.T2 : bt.T1)->sntStart(sid)+off,
len, a->second);
dest.push_back(pp);
}
@@ -773,24 +773,24 @@ namespace Moses {
#if 0
template<typename Token>
- class
+ class
PStatsCache
{
typedef boost::unordered_map<uint64_t, sptr<pstats> > my_cache_t;
boost::shared_mutex m_lock;
- my_cache_t m_cache;
-
+ my_cache_t m_cache;
+
public:
sptr<pstats> get(Bitext<Token>::iter const& phrase) const;
- sptr<pstats>
+ sptr<pstats>
add(Bitext<Token>::iter const& phrase) const
{
uint64_t pid = phrase.getPid();
- std::pair<my_cache_t::iterator,bool>
+ std::pair<my_cache_t::iterator,bool>
}
-
+
};
#endif
} // end of namespace bitext
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda.h
index a9632c056..d07fba6aa 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_agenda.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda.h
@@ -1,8 +1,8 @@
// -*- c++ -*-
// to be included from ug_bitext.h
-// The agenda handles parallel sampling.
-// It maintains a queue of unfinished sampling jobs and
+// The agenda handles parallel sampling.
+// It maintains a queue of unfinished sampling jobs and
// assigns them to a pool of workers.
//
template<typename Token>
@@ -13,7 +13,7 @@ public:
class job;
class worker;
private:
- boost::mutex lock;
+ boost::mutex lock;
std::list<sptr<job> > joblist;
std::vector<sptr<boost::thread> > workers;
bool shutdown;
@@ -27,23 +27,23 @@ public:
agenda(Bitext<Token> const& bitext);
~agenda();
- void
+ void
add_workers(int n);
- sptr<pstats>
+ sptr<pstats>
add_job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& phrase,
+ typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples, sptr<SamplingBias const> const& bias);
// add_job(Bitext<Token> const* const theBitext,
- // typename TSA<Token>::tree_iterator const& phrase,
+ // typename TSA<Token>::tree_iterator const& phrase,
// size_t const max_samples, SamplingBias const* const bias);
- sptr<job>
+ sptr<job>
get_job();
};
-
+
template<typename Token>
-class
+class
Bitext<Token>::agenda::
worker
{
@@ -61,9 +61,9 @@ void Bitext<Token>
::agenda
::add_workers(int n)
{
- static boost::posix_time::time_duration nodelay(0,0,0,0);
+ static boost::posix_time::time_duration nodelay(0,0,0,0);
boost::lock_guard<boost::mutex> guard(this->lock);
-
+
int target = max(1, int(n + workers.size() - this->doomed));
// house keeping: remove all workers that have finished
for (size_t i = 0; i < workers.size(); )
@@ -79,7 +79,7 @@ void Bitext<Token>
// cerr << workers.size() << "/" << target << " active" << endl;
if (int(workers.size()) > target)
this->doomed = workers.size() - target;
- else
+ else
while (int(workers.size()) < target)
{
sptr<boost::thread> w(new boost::thread(worker(*this)));
@@ -92,16 +92,16 @@ template<typename Token>
sptr<pstats> Bitext<Token>
::agenda
::add_job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& phrase,
+ typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples, sptr<SamplingBias const> const& bias)
{
boost::unique_lock<boost::mutex> lk(this->lock);
- static boost::posix_time::time_duration nodelay(0,0,0,0);
+ static boost::posix_time::time_duration nodelay(0,0,0,0);
bool fwd = phrase.root == bt.I1.get();
- sptr<job> j(new job(theBitext, phrase, fwd ? bt.I1 : bt.I2,
+ sptr<job> j(new job(theBitext, phrase, fwd ? bt.I1 : bt.I2,
max_samples, fwd, bias));
j->stats->register_worker();
-
+
joblist.push_back(j);
if (joblist.size() == 1)
{
@@ -136,7 +136,7 @@ Bitext<Token>
sptr<job> ret;
if (this->shutdown) return ret;
boost::unique_lock<boost::mutex> lock(this->lock);
- if (this->doomed)
+ if (this->doomed)
{ // the number of workers has been reduced, tell the redundant once to quit
--this->doomed;
return ret;
@@ -145,15 +145,15 @@ Bitext<Token>
typename list<sptr<job> >::iterator j = joblist.begin();
while (j != joblist.end())
{
- if ((*j)->done())
+ if ((*j)->done())
{
(*j)->stats->release();
joblist.erase(j++);
- }
+ }
else if ((*j)->workers >= 4) ++j; // no more than 4 workers per job
else break; // found one
}
- if (joblist.size())
+ if (joblist.size())
{
ret = j == joblist.end() ? joblist.front() : *j;
// if we've reached the end of the queue (all jobs have 4 workers on them),
@@ -175,12 +175,12 @@ agenda::
for (size_t i = 0; i < workers.size(); ++i)
workers[i]->join();
}
-
+
template<typename Token>
Bitext<Token>::
agenda::
agenda(Bitext<Token> const& thebitext)
: shutdown(false), doomed(0), bt(thebitext)
{ }
-
+
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h
index 0e26b6182..0e0624351 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h
@@ -4,48 +4,48 @@
// todo: add check to enforce this
template<typename Token>
-class
+class
Bitext<Token>::agenda::
-job
+job
{
#if UG_BITEXT_TRACK_ACTIVE_THREADS
static ThreadSafeCounter active;
#endif
Bitext<Token> const* const m_bitext;
- boost::mutex lock;
+ boost::mutex lock;
friend class agenda;
- boost::taus88 rnd; // every job has its own pseudo random generator
+ boost::taus88 rnd; // every job has its own pseudo random generator
double rnddenom; // denominator for scaling random sampling
size_t min_diverse; // minimum number of distinct translations
- bool flip_coin(uint64_t & sid, uint64_t & offset);
+ bool flip_coin(uint64_t & sid, uint64_t & offset);
bool step(uint64_t & sid, uint64_t & offset); // proceed to next occurrence
public:
size_t workers; // how many workers are working on this job?
sptr<TSA<Token> const> root; // root of the underlying suffix array
- char const* next; // next position to read from
+ char const* next; // next position to read from
char const* stop; // end of index range
size_t max_samples; // how many samples to extract at most
size_t ctr; /* # of phrase occurrences considered so far
- * # of samples chosen is stored in stats->good
+ * # of samples chosen is stored in stats->good
*/
size_t len; // phrase length
- bool fwd; // if true, source phrase is L1
+ bool fwd; // if true, source phrase is L1
sptr<pstats> stats; // stores statistics collected during sampling
sptr<SamplingBias const> const m_bias; // sentence-level bias for sampling
float bias_total;
bool nextSample(uint64_t & sid, uint64_t & offset); // select next occurrence
-
- int
+
+ int
check_sample_distribution(uint64_t const& sid, uint64_t const& offset);
- // for biased sampling: ensure the distribution approximately matches
+ // for biased sampling: ensure the distribution approximately matches
// the bias
-
+
bool done() const;
- job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& m,
- sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd,
+ job(Bitext<Token> const* const theBitext,
+ typename TSA<Token>::tree_iterator const& m,
+ sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd,
sptr<SamplingBias const> const& bias);
~job();
};
@@ -57,15 +57,15 @@ Bitext<Token>::agenda::job
if (stats) stats.reset();
#if UG_BITEXT_TRACK_ACTIVE_THREADS
// counter may not exist any more at destruction time, hence try .. catch ...
- try { --active; } catch (...) {}
+ try { --active; } catch (...) {}
#endif
}
template<typename Token>
Bitext<Token>::agenda::job
::job(Bitext<Token> const* const theBitext,
- typename TSA<Token>::tree_iterator const& m,
- sptr<TSA<Token> > const& r, size_t maxsmpl,
+ typename TSA<Token>::tree_iterator const& m,
+ sptr<TSA<Token> > const& r, size_t maxsmpl,
bool isfwd, sptr<SamplingBias const> const& bias)
: m_bitext(theBitext)
, rnd(0)
@@ -83,9 +83,9 @@ Bitext<Token>::agenda::job
{
stats.reset(new pstats());
stats->raw_cnt = m.approxOccurrenceCount();
- bias_total = 0;
-
- // we need to renormalize on the fly, as the summ of all sentence probs over
+ bias_total = 0;
+
+ // we need to renormalize on the fly, as the summ of all sentence probs over
// all candidates (not all sentences in the corpus) needs to add to 1.
// Profiling question: how much does that cost us?
if (m_bias)
@@ -98,8 +98,8 @@ Bitext<Token>::agenda::job
x = root->readSid(x,stop,sid);
x = root->readOffset(x,stop,offset);
#if 0
- cerr << ctr++ << " " << m.str(m_bitext->V1.get())
- << " " << sid << "/" << root->getCorpusSize()
+ cerr << ctr++ << " " << m.str(m_bitext->V1.get())
+ << " " << sid << "/" << root->getCorpusSize()
<< " " << offset << " " << stop-x << endl;
#endif
bias_total += (*m_bias)[sid];
@@ -108,7 +108,7 @@ Bitext<Token>::agenda::job
}
#if UG_BITEXT_TRACK_ACTIVE_THREADS
++active;
- // if (active%5 == 0)
+ // if (active%5 == 0)
// cerr << size_t(active) << " active jobs at " << __FILE__ << ":" << __LINE__ << endl;
#endif
}
@@ -116,8 +116,8 @@ Bitext<Token>::agenda::job
template<typename Token>
bool Bitext<Token>::agenda::job
::done() const
-{
- return (max_samples && stats->good >= max_samples) || next == stop;
+{
+ return (max_samples && stats->good >= max_samples) || next == stop;
}
template<typename Token>
@@ -125,39 +125,39 @@ int Bitext<Token>::agenda::job
::check_sample_distribution(uint64_t const& sid, uint64_t const& offset)
{ // ensure that the sampled distribution approximately matches the bias
// @return 0: SKIP this occurrence
- // @return 1: consider this occurrence for sampling
+ // @return 1: consider this occurrence for sampling
// @return 2: include this occurrence in the sample by all means
if (!m_bias) return 1;
-
+
using namespace boost::math;
typedef boost::math::binomial_distribution<> binomial;
-
+
ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
-
- float p = (*m_bias)[sid];
- id_type docid = m_bias->GetClass(sid);
- uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
+
+ float p = (*m_bias)[sid];
+ id_type docid = m_bias->GetClass(sid);
+ uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
// always consider candidates from dominating documents and
// from documents that have not been considered at all yet
bool ret = (p > .5 || k == 0);
-
+
if (ret && !log) return 1;
-
+
uint32_t N = stats->good; // number of trials
- float d = cdf(complement(binomial(N, p), k));
+ float d = cdf(complement(binomial(N, p), k));
// d: probability that samples contains k or more instances from doc #docid
- ret = ret || d >= .05;
-
+ ret = ret || d >= .05;
+
if (log)
{
Token const* t = root->getCorpus()->sntStart(sid)+offset;
Token const* x = t - min(offset,uint64_t(3));
- Token const* e = t+4;
+ Token const* e = t+4;
if (e > root->getCorpus()->sntEnd(sid))
e = root->getCorpus()->sntEnd(sid);
- *log << docid << ":" << sid << " " << size_t(k) << "/" << N
+ *log << docid << ":" << sid << " " << size_t(k) << "/" << N
<< " @" << p << " => " << d << " [";
for (size_t i = 0; i < stats->indoc.size(); ++i)
{
@@ -170,8 +170,8 @@ int Bitext<Token>::agenda::job
else if (p < .5 && d > .9) *log << "FORCE";
*log << endl;
}
-
- return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);
+
+ return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);
}
template<typename Token>
@@ -186,7 +186,7 @@ bool Bitext<Token>::agenda::job
size_t options_total = max(stats->raw_cnt, this->ctr);
size_t options_left = (options_total - this->ctr);
size_t random_number = options_left * (rnd()/(rnd.max()+1.));
- size_t threshold;
+ size_t threshold;
if (bias_total) // we have a bias and there are candidates with non-zero prob
threshold = ((*m_bias)[sid]/bias_total * options_total * max_samples);
else // no bias, or all have prob 0 (can happen with a very opinionated bias)
@@ -199,7 +199,7 @@ bool Bitext<Token>::agenda::job
::step(uint64_t & sid, uint64_t & offset)
{ // caller must lock!
if (next == stop) return false;
- UTIL_THROW_IF2
+ UTIL_THROW_IF2
( next > stop, "Fatal error at " << HERE << ". How did that happen?" );
// boost::lock_guard<boost::mutex> jguard(lock); // caller must lock!
next = root->readSid(next, stop, sid);
@@ -214,21 +214,21 @@ bool Bitext<Token>::agenda::job
{
boost::lock_guard<boost::mutex> jguard(lock);
if (max_samples == 0) // no sampling, consider all occurrences
- return step(sid, offset);
+ return step(sid, offset);
- while (step(sid,offset))
+ while (step(sid,offset))
{
size_t good = stats->good;
size_t diversity = stats->trg.size();
- if (good >= max_samples && diversity >= min_diverse)
+ if (good >= max_samples && diversity >= min_diverse)
return false; // done
- // flip_coin softly enforces approximation of the sampling to the
+ // flip_coin softly enforces approximation of the sampling to the
// bias (occurrences that would steer the sample too far from the bias
// are ruled out), and flips a biased coin otherwise.
if (!flip_coin(sid,offset)) continue;
return true;
- }
+ }
return false;
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h
index 92ed3d36a..5ff39312c 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h
@@ -7,13 +7,13 @@ Bitext<Token>::agenda
::operator()()
{
// things to do:
- //
+ //
// - have each worker maintain their own pstats object and merge
// results at the end (to minimize mutex locking);
- //
+ //
// - use a non-locked, monotonically increasing counter to
// ensure the minimum size of samples considered --- it's OK if
- // we look at more samples than required. This way, we can
+ // we look at more samples than required. This way, we can
// reduce the number of lock / unlock operations we need to do
// during sampling.
@@ -38,13 +38,13 @@ Bitext<Token>::agenda
s1, s2, e1, e2, po_fwd, po_bwd, // bounds & orientation
&aln, full_aln, !j->fwd)); // aln info / flip sides?
- if (!good)
+ if (!good)
{ // no good, probably because phrase is not coherent
j->stats->count_sample(docid, 0, po_fwd, po_bwd);
continue;
}
- // all good: register this sample as valid
+ // all good: register this sample as valid
size_t num_pairs = (s2-s1+1) * (e2-e1+1);
j->stats->count_sample(docid, num_pairs, po_fwd, po_bwd);
@@ -52,14 +52,14 @@ Bitext<Token>::agenda
Token const* t = ag.bt.T2->sntStart(sid);
Token const* eos = ag.bt.T2->sntEnd(sid);
cerr << "[" << j->stats->good + 1 << "] ";
- while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
+ while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
cerr << "[" << docid << "]" << endl;
#endif
float sample_weight = 1./num_pairs;
Token const* o = (j->fwd ? ag.bt.T2 : ag.bt.T1)->sntStart(sid);
- // adjust offsets in phrase-internal aligment
+ // adjust offsets in phrase-internal aligment
for (size_t k = 1; k < aln.size(); k += 2) aln[k] += s2 - s1;
vector<uint64_t> seen; seen.reserve(10);
@@ -93,7 +93,7 @@ Bitext<Token>::agenda
UTIL_THROW_IF2(!ok, "Could not extend target phrase.");
}
if (s < s2) // shift phrase-internal alignments
- for (size_t k = 1; k < aln.size(); k += 2)
+ for (size_t k = 1; k < aln.size(); k += 2)
--aln[k];
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
index cb3804edc..bcda9ebf3 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc
@@ -16,12 +16,12 @@ namespace Moses
jstats::
jstats()
: my_rcnt(0), my_cnt2(0), my_wcnt(0)
- {
- for (int i = 0; i <= Moses::LRModel::NONE; ++i)
+ {
+ for (int i = 0; i <= Moses::LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
my_aln.reserve(1);
}
-
+
jstats::
jstats(jstats const& other)
{
@@ -35,8 +35,8 @@ namespace Moses
obwd[i] = other.obwd[i];
}
}
-
- uint32_t
+
+ uint32_t
jstats::
dcnt_fwd(PhraseOrientation const idx) const
{
@@ -44,15 +44,15 @@ namespace Moses
return ofwd[idx];
}
- uint32_t
+ uint32_t
jstats::
dcnt_bwd(PhraseOrientation const idx) const
{
assert(idx <= Moses::LRModel::NONE);
return obwd[idx];
}
-
- void
+
+ void
jstats::
add(float w, vector<uchar> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient, int const docid)
@@ -65,7 +65,7 @@ namespace Moses
{
size_t i = 0;
while (i < my_aln.size() && my_aln[i].second != a) ++i;
- if (i == my_aln.size())
+ if (i == my_aln.size())
my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
else
my_aln[i].first++;
@@ -83,7 +83,7 @@ namespace Moses
vector<pair<size_t, vector<uchar> > > const&
jstats::
- aln() const
+ aln() const
{ return my_aln; }
} // namespace bitext
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
index ce2e89438..dade27649 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h
@@ -4,20 +4,20 @@
#include "ug_lexical_reordering.h"
#include <boost/thread.hpp>
-namespace Moses
+namespace Moses
{
namespace bitext
{
using namespace ugdiss;
- // "joint" (i.e., phrase pair) statistics
+ // "joint" (i.e., phrase pair) statistics
class
jstats
{
boost::mutex lock;
uint32_t my_rcnt; // unweighted joint count
uint32_t my_cnt2; // raw counts L2
- float my_wcnt; // weighted joint count
+ float my_wcnt; // weighted joint count
// to do: use a static alignment pattern store that stores each pattern only
// once, so that we don't have to store so many alignment vectors
@@ -33,18 +33,18 @@ namespace Moses
uint32_t rcnt() const; // raw joint counts
uint32_t cnt2() const; // raw target phrase occurrence count
float wcnt() const; // weighted joint counts
-
+
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a, uint32_t const cnt2,
- uint32_t fwd_orient, uint32_t bwd_orient,
+ uint32_t fwd_orient, uint32_t bwd_orient,
int const docid);
void invalidate();
void validate();
bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
- void fill_lr_vec(Moses::LRModel::Direction const& dir,
- Moses::LRModel::ModelType const& mdl,
+ void fill_lr_vec(Moses::LRModel::Direction const& dir,
+ Moses::LRModel::ModelType const& mdl,
vector<float>& v);
};
}
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc
index 482957508..580d7669b 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc
+++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc
@@ -8,11 +8,11 @@ namespace Moses
#if UG_BITEXT_TRACK_ACTIVE_THREADS
ThreadSafeCounter pstats::active;
#endif
-
+
pstats::
pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
{
- for (int i = 0; i <= Moses::LRModel::NONE; ++i)
+ for (int i = 0; i <= Moses::LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
}
@@ -21,7 +21,7 @@ namespace Moses
{
#if UG_BITEXT_TRACK_ACTIVE_THREADS
// counter may not exist any more at destruction time, so try ... catch
- try { --active; } catch (...) {}
+ try { --active; } catch (...) {}
#endif
}
@@ -33,7 +33,7 @@ namespace Moses
++this->in_progress;
this->lock.unlock();
}
-
+
void
pstats::
release()
@@ -44,9 +44,9 @@ namespace Moses
this->lock.unlock();
}
- void
+ void
pstats
- ::count_sample(int const docid, size_t const num_pairs,
+ ::count_sample(int const docid, size_t const num_pairs,
int const po_fwd, int const po_bwd)
{
boost::lock_guard<boost::mutex> guard(lock);
@@ -65,10 +65,10 @@ namespace Moses
bool
pstats::
- add(uint64_t pid, float const w,
- vector<uchar> const& a,
- uint32_t const cnt2,
- uint32_t fwd_o,
+ add(uint64_t pid, float const w,
+ vector<uchar> const& a,
+ uint32_t const cnt2,
+ uint32_t fwd_o,
uint32_t bwd_o, int const docid)
{
boost::lock_guard<boost::mutex> guard(this->lock);
@@ -76,7 +76,7 @@ namespace Moses
entry.add(w, a, cnt2, fwd_o, bwd_o, docid);
if (this->good < entry.rcnt())
{
- UTIL_THROW(util::Exception, "more joint counts than good counts:"
+ UTIL_THROW(util::Exception, "more joint counts than good counts:"
<< entry.rcnt() << "/" << this->good << "!");
}
return true;
diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
index c5b6c0152..9a14e378b 100644
--- a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
+++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h
@@ -12,7 +12,7 @@ namespace Moses
{
namespace bitext
{
- struct
+ struct
pstats
{
typedef boost::unordered_map<uint64_t, sptr<pstats> > map_t;
@@ -23,8 +23,8 @@ namespace Moses
#endif
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for me to be ready
-
- size_t raw_cnt; // (approximate) raw occurrence count
+
+ size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
@@ -34,25 +34,25 @@ namespace Moses
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
std::vector<uint32_t> indoc; // distribution over where samples came from
-
+
typedef std::map<uint64_t, jstats> trg_map_t;
trg_map_t trg;
pstats();
~pstats();
void release();
void register_worker();
- size_t count_workers() { return in_progress; }
+ size_t count_workers() { return in_progress; }
- bool
+ bool
add(uint64_t const pid, // target phrase id
float const w, // sample weight (1./(# of phrases extractable))
alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation
- int const docid); // document where sample was found
+ int const docid); // document where sample was found
- void
+ void
count_sample(int const docid, // document where sample was found
size_t const num_pairs, // # of phrases extractable here
int const po_fwd, // fwd phrase orientation
diff --git a/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h b/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h
index 845fe374e..89dc93ad1 100644
--- a/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h
+++ b/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h
@@ -25,13 +25,13 @@ namespace ugdiss
return NULL;
};
- ConllBottomUpToken const*
- stop(ConllBottomUpToken const* seqStart,
+ ConllBottomUpToken const*
+ stop(ConllBottomUpToken const* seqStart,
ConllBottomUpToken const* seqEnd) const
{
return NULL;
};
-
+
bool operator<(T const& other) const { return this->cmp(other) < 0; }
bool operator>(T const& other) const { return this->cmp(other) > 0; }
bool operator==(T const& other) const { return this->cmp(other) == 0; }
@@ -44,9 +44,9 @@ namespace ugdiss
return false;
}
};
-
+
template<typename T>
- ConllBottomUpToken<T> const*
+ ConllBottomUpToken<T> const*
ConllBottomUpToken<T>::
next(int length) const
{
diff --git a/moses/TranslationModel/UG/mm/ug_conll_record.h b/moses/TranslationModel/UG/mm/ug_conll_record.h
index ea2cda29e..e52a4974b 100644
--- a/moses/TranslationModel/UG/mm/ug_conll_record.h
+++ b/moses/TranslationModel/UG/mm/ug_conll_record.h
@@ -3,22 +3,22 @@
#include "ug_typedefs.h"
// Base class for dependency tree corpora with POS and Lemma annotations
-namespace ugdiss
+namespace ugdiss
{
using namespace std;
- class
- Conll_Record
+ class
+ Conll_Record
{
public:
id_type sform; // surface form
id_type lemma; // lemma
uchar majpos; // major part of speech
uchar minpos; // minor part of speech
- short parent; // id of parent
+ short parent; // id of parent
uchar dtype; // dependency type
uchar info[3]; /* additional information (depends on the part of speech)
- * a place holder for the time being, to ensure proper
+ * a place holder for the time being, to ensure proper
* alignment in memory */
Conll_Record();
Conll_Record const* up(int length=1) const;
@@ -38,8 +38,8 @@ namespace ugdiss
* @parameter PS Vocabulary for part-of-speech
* @parameter DT Vocabulary for dependency type
*/
- Conll_Record(string const& line,
- TokenIndex const& SF, TokenIndex const& LM,
+ Conll_Record(string const& line,
+ TokenIndex const& SF, TokenIndex const& LM,
TokenIndex const& PS, TokenIndex const& DT);
/** store the record as-is to disk (for memory-mapped reading later) */
@@ -62,7 +62,7 @@ namespace ugdiss
// this is for contigous word sequences extracted from longer sequences
// adjust parent pointers to 0 (no parent) if they point out of the
// subsequence
- void
+ void
fixParse(Conll_Record* start, Conll_Record* stop);
} // end of namespace ugdiss
diff --git a/moses/TranslationModel/UG/mm/ug_corpus_token.cc b/moses/TranslationModel/UG/mm/ug_corpus_token.cc
index 742c17ace..4be8cbd95 100644
--- a/moses/TranslationModel/UG/mm/ug_corpus_token.cc
+++ b/moses/TranslationModel/UG/mm/ug_corpus_token.cc
@@ -6,9 +6,9 @@ namespace ugdiss
{
id_type const&
SimpleWordId::
- id() const
- {
- return theID;
+ id() const
+ {
+ return theID;
}
int
diff --git a/moses/TranslationModel/UG/mm/ug_corpus_token.h b/moses/TranslationModel/UG/mm/ug_corpus_token.h
index c1baaf21e..b9693cbf2 100644
--- a/moses/TranslationModel/UG/mm/ug_corpus_token.h
+++ b/moses/TranslationModel/UG/mm/ug_corpus_token.h
@@ -19,7 +19,7 @@ namespace ugdiss
{
/** Simple wrapper around id_type for use with the Ttrack/TSA template classes */
- class SimpleWordId
+ class SimpleWordId
{
id_type theID;
public:
@@ -29,7 +29,7 @@ namespace ugdiss
bool operator==(SimpleWordId const& other) const;
id_type remap(vector<id_type const*> const& m) const;
};
-
+
/** Token class for suffix arrays */
template<typename T>
class
@@ -43,16 +43,16 @@ namespace ugdiss
L2R_Token const* next(int n=1) const { return this+n; }
- /** return a pointer to the end of a sentence; used as a stopping criterion during
+ /** return a pointer to the end of a sentence; used as a stopping criterion during
* comparison of suffixes; see Ttrack::cmp() */
template<typename TTRACK_TYPE>
- L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
- {
- return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid));
+ L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
+ {
+ return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid));
}
- L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const
- {
+ L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const
+ {
return seqEnd;
}
@@ -69,20 +69,20 @@ namespace ugdiss
{
public:
typedef T Token;
-
+
R2L_Token() : T() {};
R2L_Token(id_type id) : T(id) {};
R2L_Token const* next(int n = 1) const { return this - n; }
template<typename TTRACK_TYPE>
- R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
- {
- return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1);
+ R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
+ {
+ return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1);
}
- R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const
- {
+ R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const
+ {
assert(seqStart);
return seqStart - 1;
}
diff --git a/moses/TranslationModel/UG/mm/ug_deptree.cc b/moses/TranslationModel/UG/mm/ug_deptree.cc
index 545268e04..003d9b35e 100644
--- a/moses/TranslationModel/UG/mm/ug_deptree.cc
+++ b/moses/TranslationModel/UG/mm/ug_deptree.cc
@@ -7,14 +7,14 @@ using namespace std;
namespace ugdiss
{
- bool
+ bool
Conll_Record::
isDescendentOf(Conll_Record const* other) const
{
Conll_Record const* a = this;
- while (a != other && a->parent)
+ while (a != other && a->parent)
a += a->parent;
- return a==other;
+ return a==other;
}
Conll_Record&
@@ -43,7 +43,7 @@ namespace ugdiss
}
Conll_AllFields::
- Conll_AllFields()
+ Conll_AllFields()
: Conll_Record::Conll_Record()
{};
@@ -64,7 +64,7 @@ namespace ugdiss
}
Conll_WildCard::
- Conll_WildCard()
+ Conll_WildCard()
: Conll_Record::Conll_Record()
{};
@@ -95,8 +95,8 @@ namespace ugdiss
#if 0
Conll_Record::
- Conll_Record(string const& line,
- TokenIndex const& SF, TokenIndex const& LM,
+ Conll_Record(string const& line,
+ TokenIndex const& SF, TokenIndex const& LM,
TokenIndex const& PS, TokenIndex const& DT)
{
@@ -140,35 +140,35 @@ namespace ugdiss
#endif
Conll_Sform::
- Conll_Sform()
- : Conll_Record::Conll_Record()
+ Conll_Sform()
+ : Conll_Record::Conll_Record()
{};
Conll_MinPos::
- Conll_MinPos()
- : Conll_Record::Conll_Record()
+ Conll_MinPos()
+ : Conll_Record::Conll_Record()
{};
-
+
Conll_MinPos_Lemma::
- Conll_MinPos_Lemma()
- : Conll_Record::Conll_Record()
+ Conll_MinPos_Lemma()
+ : Conll_Record::Conll_Record()
{};
Conll_Lemma::
Conll_Lemma()
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{};
Conll_Lemma::
Conll_Lemma(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->lemma = _id;
};
Conll_MinPos::
Conll_MinPos(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->minpos = _id;
};
@@ -182,7 +182,7 @@ namespace ugdiss
Conll_MajPos::
Conll_MajPos(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->majpos = _id;
};
@@ -219,21 +219,21 @@ namespace ugdiss
Conll_MinPos_Lemma::
cmp(Conll_Record const& other) const
{
- if (this->minpos != 0 && other.minpos != 0 && this->minpos != other.minpos)
+ if (this->minpos != 0 && other.minpos != 0 && this->minpos != other.minpos)
return this->minpos < other.minpos ? -1 : 1;
if (this->lemma != 0 && other.lemma != 0 && this->lemma != other.lemma)
return this->lemma < other.lemma ? -1 : 1;
return 0;
}
- id_type
+ id_type
Conll_Lemma::
- id() const
- {
- return this->lemma;
+ id() const
+ {
+ return this->lemma;
}
- int
+ int
Conll_Lemma::
cmp(Conll_Record const& other) const
{
@@ -251,16 +251,16 @@ namespace ugdiss
Conll_Sform::
Conll_Sform(id_type _id)
- : Conll_Record::Conll_Record()
+ : Conll_Record::Conll_Record()
{
this->sform = _id;
};
- id_type
+ id_type
Conll_Sform
- ::id() const
- {
- return this->sform;
+ ::id() const
+ {
+ return this->sform;
}
int
@@ -282,7 +282,7 @@ namespace ugdiss
short p = w[i].rec->parent;
if (p != 0)
{
- if (p > 0) assert(i+p < w.size());
+ if (p > 0) assert(i+p < w.size());
else assert(i >= size_t(-p));
w[i].parent = &(w[i+p]);
w[i].parent->children.push_back(&(w[i]));
@@ -291,7 +291,7 @@ namespace ugdiss
}
#endif
- /** @return true if the linear sequence of /Conll_Record/s is coherent,
+ /** @return true if the linear sequence of /Conll_Record/s is coherent,
* i.e., a proper connected tree structure */
bool
isCoherent(Conll_Record const* const start, Conll_Record const* const stop)
@@ -300,16 +300,16 @@ namespace ugdiss
for (Conll_Record const* x = start; outOfRange <= 1 && x < stop; ++x)
{
Conll_Record const* n = x->up();
- if (!n || n < start || n >= stop)
+ if (!n || n < start || n >= stop)
outOfRange++;
}
return outOfRange<=1;
}
-
+
// this is for contigous word sequences extracted from longer sequences
// adjust parent pointers to 0 (no parent) if they point out of the
// subsequence
- void
+ void
fixParse(Conll_Record* start, Conll_Record* stop)
{
int len = stop-start;
diff --git a/moses/TranslationModel/UG/mm/ug_deptree.h b/moses/TranslationModel/UG/mm/ug_deptree.h
index 0d393aa33..b28a4bbe8 100644
--- a/moses/TranslationModel/UG/mm/ug_deptree.h
+++ b/moses/TranslationModel/UG/mm/ug_deptree.h
@@ -19,8 +19,8 @@ using namespace std;
namespace ugdiss
{
- // Fills the vector v with pointers to the internal root r_x for the
- // stretch [start,x] for all x: start <= x < stop. If the stretch
+ // Fills the vector v with pointers to the internal root r_x for the
+ // stretch [start,x] for all x: start <= x < stop. If the stretch
// is incoherent, r_x is NULL
template<typename T>
void
@@ -37,8 +37,8 @@ namespace ugdiss
{
size_t p = x-start;
root[p] = x+x->parent;
- for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i))
- if (root[i]==x)
+ for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i))
+ if (root[i]==x)
isR.reset(i);
if (root[p] < start || root[p] >= stop)
isR.set(x-start);
@@ -46,7 +46,7 @@ namespace ugdiss
}
}
- // return the root of the tree if the span [start,stop) constitutes a
+ // return the root of the tree if the span [start,stop) constitutes a
// tree, NULL otherwise
template<typename T>
T const*
@@ -66,7 +66,7 @@ namespace ugdiss
assert(outOfRange);
return outOfRange == 1 ? root : NULL;
}
-
+
// return the governor of the tree given by [start,stop) if the span
// constitutes a tree, NULL otherwise
template<typename T>
@@ -82,7 +82,7 @@ namespace ugdiss
{
if (root && n != root)
numRoots++;
- else
+ else
{
root = n;
if (!numRoots) numRoots++;
@@ -101,7 +101,7 @@ namespace ugdiss
T const* b = as<T>(&(*v.end()));
return (a==b) ? NULL : findInternalRoot<T>(a,b);
}
-
+
#if 1
class DTNode
{
@@ -113,7 +113,7 @@ namespace ugdiss
};
/** A parsed sentence */
- class
+ class
DependencyTree
{
public:
@@ -189,13 +189,13 @@ namespace ugdiss
int cmp(Conll_Record const& other) const;
};
- /** @return true if the linear sequence of /Conll_Record/s is coherent,
+ /** @return true if the linear sequence of /Conll_Record/s is coherent,
* i.e., a proper connected tree structure */
bool
isCoherent(Conll_Record const* start, Conll_Record const* const stop);
- /** @return the root node of the tree covering the span [start,stop), if the span is coherent;
+ /** @return the root node of the tree covering the span [start,stop), if the span is coherent;
* NULL otherwise */
template<typename T>
T const* topNode(T const* start , T const* stop)
@@ -204,9 +204,9 @@ namespace ugdiss
for (T const* x = start; x < stop; ++x)
{
T const* n = reinterpret_cast<T const*>(x->up());
- if (!n || n < start || n >= stop)
+ if (!n || n < start || n >= stop)
{
- if (ret) return NULL;
+ if (ret) return NULL;
else ret = x;
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_im_bitext.cc b/moses/TranslationModel/UG/mm/ug_im_bitext.cc
index 9f26a181b..b411cc7dc 100644
--- a/moses/TranslationModel/UG/mm/ug_im_bitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_im_bitext.cc
@@ -6,15 +6,15 @@ namespace Moses
{
template<>
- sptr<imBitext<L2R_Token<SimpleWordId> > >
+ sptr<imBitext<L2R_Token<SimpleWordId> > >
imBitext<L2R_Token<SimpleWordId> >::
- add(vector<string> const& s1,
- vector<string> const& s2,
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& aln) const
{
typedef L2R_Token<SimpleWordId> TKN;
assert(s1.size() == s2.size() && s1.size() == aln.size());
-
+
#ifndef NDEBUG
size_t first_new_snt = this->T1 ? this->T1->size() : 0;
#endif
@@ -24,7 +24,7 @@ namespace Moses
boost::unique_lock<boost::shared_mutex> guard(m_lock);
ret.reset(new imBitext<TKN>(*this));
}
-
+
// we add the sentences in separate threads (so it's faster)
boost::thread thread1(snt_adder<TKN>(s1,*ret->V1,ret->myT1,ret->myI1));
// thread1.join(); // for debugging
@@ -41,10 +41,10 @@ namespace Moses
binwrite(obuf,row);
binwrite(obuf,col);
}
- // important: DO NOT replace the two lines below this comment by
- // char const* x = obuf.str().c_str(), as the memory x is pointing
+ // important: DO NOT replace the two lines below this comment by
+ // char const* x = obuf.str().c_str(), as the memory x is pointing
// to is freed immediately upon deconstruction of the string object.
- string foo = obuf.str();
+ string foo = obuf.str();
char const* x = foo.c_str();
vector<char> v(x,x+foo.size());
ret->myTx = append(ret->myTx, v);
diff --git a/moses/TranslationModel/UG/mm/ug_im_bitext.h b/moses/TranslationModel/UG/mm/ug_im_bitext.h
index a620b7219..63e44f1b9 100644
--- a/moses/TranslationModel/UG/mm/ug_im_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_im_bitext.h
@@ -4,7 +4,7 @@
namespace Moses
{
- namespace bitext
+ namespace bitext
{
template<typename TKN>
class imBitext : public Bitext<TKN>
@@ -12,7 +12,7 @@ namespace Moses
sptr<imTtrack<char> > myTx;
sptr<imTtrack<TKN> > myT1;
sptr<imTtrack<TKN> > myT2;
- sptr<imTSA<TKN> > myI1;
+ sptr<imTSA<TKN> > myI1;
sptr<imTSA<TKN> > myI2;
static ThreadSafeCounter my_revision;
public:
@@ -23,26 +23,26 @@ namespace Moses
size_t max_sample = 5000, size_t num_workers=4);
imBitext(size_t max_sample = 5000, size_t num_workers=4);
imBitext(imBitext const& other);
-
- // sptr<imBitext<TKN> >
+
+ // sptr<imBitext<TKN> >
// add(vector<TKN> const& s1, vector<TKN> const& s2, vector<ushort> & a);
- sptr<imBitext<TKN> >
- add(vector<string> const& s1,
- vector<string> const& s2,
+ sptr<imBitext<TKN> >
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& a) const;
};
template<typename TKN>
- ThreadSafeCounter
+ ThreadSafeCounter
imBitext<TKN>::my_revision;
template<typename TKN>
imBitext<TKN>::
imBitext(size_t max_sample, size_t num_workers)
: Bitext<TKN>(max_sample, num_workers)
- {
+ {
this->m_default_sample_size = max_sample;
this->V1.reset(new TokenIndex());
this->V2.reset(new TokenIndex());
@@ -50,14 +50,14 @@ namespace Moses
this->V2->setDynamic(true);
++my_revision;
}
-
+
template<typename TKN>
imBitext<TKN>::
imBitext(sptr<TokenIndex> const& v1,
sptr<TokenIndex> const& v2,
size_t max_sample, size_t num_workers)
: Bitext<TKN>(max_sample, num_workers)
- {
+ {
// this->default_sample_size = max_sample;
this->V1 = v1;
this->V2 = v2;
@@ -65,12 +65,12 @@ namespace Moses
this->V2->setDynamic(true);
++my_revision;
}
-
+
template<typename TKN>
imBitext<TKN>::
imBitext(imBitext<TKN> const& other)
- {
+ {
this->myTx = other.myTx;
this->myT1 = other.myT1;
this->myT2 = other.myT2;
@@ -89,17 +89,17 @@ namespace Moses
}
template<>
- sptr<imBitext<L2R_Token<SimpleWordId> > >
+ sptr<imBitext<L2R_Token<SimpleWordId> > >
imBitext<L2R_Token<SimpleWordId> >::
- add(vector<string> const& s1,
- vector<string> const& s2,
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& aln) const;
template<typename TKN>
- sptr<imBitext<TKN> >
+ sptr<imBitext<TKN> >
imBitext<TKN>::
- add(vector<string> const& s1,
- vector<string> const& s2,
+ add(vector<string> const& s1,
+ vector<string> const& s2,
vector<string> const& aln) const
{
throw "Not yet implemented";
diff --git a/moses/TranslationModel/UG/mm/ug_im_tsa.h b/moses/TranslationModel/UG/mm/ug_im_tsa.h
index f7256ba2d..e920d9f96 100644
--- a/moses/TranslationModel/UG/mm/ug_im_tsa.h
+++ b/moses/TranslationModel/UG/mm/ug_im_tsa.h
@@ -23,7 +23,7 @@ namespace ugdiss
using namespace std;
using namespace boost;
namespace bio=boost::iostreams;
-
+
// template<typename TOKEN> class imBitext<TOKEN>;
//-----------------------------------------------------------------------
@@ -35,61 +35,61 @@ namespace ugdiss
public:
class tree_iterator;
friend class tree_iterator;
-
+
private:
vector<cpos> sufa; // stores the actual array
- vector<filepos_type> index; /* top-level index into regions in sufa
+ vector<filepos_type> index; /* top-level index into regions in sufa
* (for faster access) */
private:
- char const*
+ char const*
index_jump(char const* a, char const* z, float ratio) const;
- char const*
+ char const*
getLowerBound(id_type id) const;
- char const*
+ char const*
getUpperBound(id_type id) const;
-
+
public:
imTSA();
- imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
- bdBitset const* filt,
+ imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
+ bdBitset const* filt,
ostream* log = NULL);
- imTSA(imTSA<TOKEN> const& prior,
+ imTSA(imTSA<TOKEN> const& prior,
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize);
- count_type
- sntCnt(char const* p, char const * const q) const;
+ count_type
+ sntCnt(char const* p, char const * const q) const;
- count_type
+ count_type
rawCnt(char const* p, char const * const q) const;
-
- void
- getCounts(char const* p, char const * const q,
+
+ void
+ getCounts(char const* p, char const * const q,
count_type& sids, count_type& raw) const;
-
- char const*
+
+ char const*
readSid(char const* p, char const* q, id_type& sid) const;
-
- char const*
+
+ char const*
readSid(char const* p, char const* q, ::uint64_t& sid) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, uint16_t& offset) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, ::uint64_t& offset) const;
-
- void
+
+ void
sanityCheck() const;
-
- void
+
+ void
save_as_mm_tsa(string fname) const;
-
+
/// add a sentence to the database
- // shared_ptr<imTSA<TOKEN> > add(vector<TOKEN> const& snt) const;
+ // shared_ptr<imTSA<TOKEN> > add(vector<TOKEN> const& snt) const;
};
@@ -108,12 +108,12 @@ namespace ugdiss
tree_iterator(imTSA<TOKEN> const* s)
: TSA<TOKEN>::tree_iterator::tree_iterator(reinterpret_cast<TSA<TOKEN> const*>(s))
{};
-
+
/** jump to the point 1/ratio in a tightly packed index
* assumes that keys are flagged with '1', values with '0'
*/
template<typename TOKEN>
- char const*
+ char const*
imTSA<TOKEN>::
index_jump(char const* a, char const* z, float ratio) const
{
@@ -123,10 +123,10 @@ namespace ugdiss
cpos const* xz = reinterpret_cast<cpos const*>(z);
return reinterpret_cast<char const*>(xa+int(ratio*(xz-xa)));
}
-
+
template<typename TOKEN>
imTSA<TOKEN>::
- imTSA()
+ imTSA()
{
this->indexSize = 0;
// this->data = NULL;
@@ -135,7 +135,7 @@ namespace ugdiss
this->corpusSize = 0;
this->BitSetCachingThreshold=4096;
};
-
+
// build an array from all the tokens in the sentences in *c that are
// specified in filter
template<typename TOKEN>
@@ -153,12 +153,12 @@ namespace ugdiss
}
assert(filter);
// In the first iteration over the corpus, we obtain word counts.
- // They allows us to
+ // They allows us to
// a. allocate the exact amount of memory we need
- // b. place tokens into the right 'section' in the array, based on
+ // b. place tokens into the right 'section' in the array, based on
// the ID of the first token in the sequence. We can then sort
// each section separately.
-
+
if (log) *log << "counting tokens ... ";
int slimit = 65536;
// slimit=65536 is the upper bound of what we can fit into a ushort which
@@ -176,7 +176,7 @@ namespace ugdiss
vector<count_type> tmp(wcnt.size(),0);
for (size_t i = 1; i < wcnt.size(); ++i)
tmp[i] = tmp[i-1] + wcnt[i-1];
-
+
// Now dump all token positions into the right place in sufa
this->corpusSize = 0;
for (id_type sid = filter->find_first();
@@ -204,7 +204,7 @@ namespace ugdiss
for (size_t i = 0; i < wcnt.size(); i++)
{
if (log && wcnt[i] > 5000)
- *log << "sorting " << wcnt[i]
+ *log << "sorting " << wcnt[i]
<< " entries starting with id " << i << "." << endl;
index[i+1] = index[i]+wcnt[i];
assert(index[i+1]==tmp[i]); // sanity check
@@ -247,7 +247,7 @@ namespace ugdiss
imTSA<TOKEN>::
getUpperBound(id_type id) const
{
- if (++id >= this->index.size())
+ if (++id >= this->index.size())
return NULL;
assert(index[id] <= this->sufa.size());
return reinterpret_cast<char const*>(&(this->sufa.front()) + index[id]);
@@ -263,7 +263,7 @@ namespace ugdiss
sid = reinterpret_cast<cpos const*>(p)->sid;
return p;
}
-
+
template<typename TOKEN>
char const*
imTSA<TOKEN>::
@@ -306,11 +306,11 @@ namespace ugdiss
cpos const* xq = reinterpret_cast<cpos const*>(q);
return xq-xp;
}
-
+
template<typename TOKEN>
- void
+ void
imTSA<TOKEN>::
- getCounts(char const* p, char const* const q,
+ getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const
{
id_type sid; // uint16_t off;
@@ -328,7 +328,7 @@ namespace ugdiss
}
template<typename TOKEN>
- void
+ void
imTSA<TOKEN>::
save_as_mm_tsa(string fname) const
{
@@ -352,34 +352,34 @@ namespace ugdiss
for (size_t i = 0; i < mmIndex.size(); i++)
numwrite(out,mmIndex[i]-mmIndex[0]);
out.seekp(0);
- numwrite(out,idxStart);
+ numwrite(out,idxStart);
out.close();
}
template<typename TOKEN>
imTSA<TOKEN>::
- imTSA(imTSA<TOKEN> const& prior,
+ imTSA(imTSA<TOKEN> const& prior,
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize)
{
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(crp.get());
-
+
// count how many tokens will be added to the TSA
// and index the new additions to the corpus
size_t newToks = 0;
- BOOST_FOREACH(id_type sid, newsids)
+ BOOST_FOREACH(id_type sid, newsids)
newToks += crp->sntLen(sid);
vector<cpos> nidx(newToks); // new array entries
-
+
size_t n = 0;
- BOOST_FOREACH(id_type sid, newsids)
+ BOOST_FOREACH(id_type sid, newsids)
{
assert(sid < crp->size());
for (size_t o = 0; o < (*crp)[sid].size(); ++o, ++n)
{ nidx[n].offset = o; nidx[n].sid = sid; }
}
sort(nidx.begin(),nidx.end(),sorter);
-
+
// create the new suffix array
this->numTokens = newToks + prior.sufa.size();
this->sufa.resize(this->numTokens);
@@ -388,10 +388,10 @@ namespace ugdiss
this->corpusSize = crp->size();
this->corpus = crp;
this->index.resize(vsize+1);
-
+
size_t i = 0;
typename vector<cpos>::iterator k = this->sufa.begin();
- // cerr << newToks << " new items at "
+ // cerr << newToks << " new items at "
// << __FILE__ << ":" << __LINE__ << endl;
for (size_t n = 0; n < nidx.size();)
{
@@ -402,7 +402,7 @@ namespace ugdiss
this->index[i] = k - this->sufa.begin();
if (++i < prior.index.size() && prior.index[i-1] < prior.index[i])
{
- k = copy(prior.sufa.begin() + prior.index[i-1],
+ k = copy(prior.sufa.begin() + prior.index[i-1],
prior.sufa.begin() + prior.index[i], k);
}
}
@@ -410,13 +410,13 @@ namespace ugdiss
if (++i < prior.index.size() && prior.index[i] > prior.index[i-1])
{
size_t j = prior.index[i-1];
- while (j < prior.index[i] && n < nidx.size()
+ while (j < prior.index[i] && n < nidx.size()
&& crp->getToken(nidx[n])->id() < i)
{
assert(k < this->sufa.end());
if (sorter(prior.sufa[j],nidx[n]))
*k++ = prior.sufa[j++];
- else
+ else
*k++ = nidx[n++];
}
while (j < prior.index[i])
@@ -436,7 +436,7 @@ namespace ugdiss
while (++i < this->index.size())
{
if (i < prior.index.size() && prior.index[i-1] < prior.index[i])
- k = copy(prior.sufa.begin() + prior.index[i-1],
+ k = copy(prior.sufa.begin() + prior.index[i-1],
prior.sufa.begin() + prior.index[i], k);
this->index[i] = k - this->sufa.begin();
}
@@ -462,5 +462,5 @@ namespace ugdiss
}
}
-
+
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_im_ttrack.h b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
index ac49ebcd4..20ab653f4 100644
--- a/moses/TranslationModel/UG/mm/ug_im_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_im_ttrack.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// In-memory corpus track
-// (c) 2006-2012 Ulrich Germann.
+// (c) 2006-2012 Ulrich Germann.
#ifndef __ug_im_ttrack
#define __ug_im_ttrack
@@ -36,20 +36,20 @@ namespace ugdiss
template<typename Token> class imTtrack;
template<typename TOKEN>
- typename boost::shared_ptr<imTtrack<TOKEN> >
+ typename boost::shared_ptr<imTtrack<TOKEN> >
append(typename boost::shared_ptr<imTtrack<TOKEN> > const & crp, vector<TOKEN> const & snt);
template<typename Token>
class imTtrack : public Ttrack<Token>
{
-
+
private:
size_t numToks;
boost::shared_ptr<vector<vector<Token> > > myData; // pointer to corpus data
friend class imTSA<Token>;
- friend
- typename boost::shared_ptr<imTtrack<Token> >
+ friend
+ typename boost::shared_ptr<imTtrack<Token> >
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
void m_check_token_count(); // debugging function
@@ -60,14 +60,14 @@ namespace ugdiss
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL);
imTtrack(size_t reserve = 0);
// imTtrack(istream& in, Vocab& V);
-
+
/** return pointer to beginning of sentence */
- Token const* sntStart(size_t sid) const;
+ Token const* sntStart(size_t sid) const;
/** return pointer to beginning of sentence */
- Token const* sntEnd(size_t sid) const;
+ Token const* sntEnd(size_t sid) const;
- size_t size() const;
+ size_t size() const;
size_t numTokens() const;
id_type findSid(Token const* t) const;
@@ -82,16 +82,16 @@ namespace ugdiss
size_t check = 0;
BOOST_FOREACH(vector<Token> const& s, *myData)
check += s.size();
- UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
+ UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
<< " Wrong token count after appending sentence!"
- << " Counted " << check << " but expected "
- << this->numToks << " in a total of " << myData->size()
+ << " Counted " << check << " but expected "
+ << this->numToks << " in a total of " << myData->size()
<< " sentences.");
-
+
}
template<typename Token>
- Token const*
+ Token const*
imTtrack<Token>::
sntStart(size_t sid) const // return pointer to beginning of sentence
{
@@ -99,9 +99,9 @@ namespace ugdiss
if ((*myData)[sid].size() == 0) return NULL;
return &((*myData)[sid].front());
}
-
+
template<typename Token>
- Token const*
+ Token const*
imTtrack<Token>::
sntEnd(size_t sid) const // return pointer to end of sentence
{
@@ -109,9 +109,9 @@ namespace ugdiss
if ((*myData)[sid].size() == 0) return NULL;
return &(*myData)[sid].back()+1;
}
-
+
template<typename Token>
- size_t
+ size_t
imTtrack<Token>::
size() const // return size of corpus (in number of sentences)
{
@@ -120,15 +120,15 @@ namespace ugdiss
// offset in the myIndex than there are sentences
return myData->size();
}
-
+
template<typename Token>
- size_t
+ size_t
imTtrack<Token>::
numTokens() const // return size of corpus (in number of words)
{
return numToks;
}
-
+
template<typename Token>
imTtrack<Token>::
imTtrack(istream& in, TokenIndex const& V, ostream* log)
@@ -140,19 +140,19 @@ namespace ugdiss
boost::unordered_map<string,id_type> H;
for (id_type i = 0; i < V.knownVocabSize(); ++i)
H[V[i]] = i;
- while (getline(in,line))
+ while (getline(in,line))
{
myData->push_back(vector<Token>());
- if (log && ++linectr%1000000==0)
+ if (log && ++linectr%1000000==0)
*log << linectr/1000000 << "M lines of input processed" << endl;
istringstream buf(line);
- while (buf>>w)
+ while (buf>>w)
myData->back().push_back(Token(H[w]));
myData->back().resize(myData.back().size());
numToks += myData->back().size();
}
}
-
+
template<typename Token>
imTtrack<Token>::
imTtrack(size_t reserve)
@@ -171,7 +171,7 @@ namespace ugdiss
BOOST_FOREACH(vector<Token> const& v, *d)
numToks += v.size();
}
-
+
template<typename Token>
id_type
imTtrack<Token>::
@@ -182,7 +182,7 @@ namespace ugdiss
{
vector<Token> const& v = (*myData)[i];
if (v.size() == 0) continue;
- if (&v.front() <= t && &v.back() >= t)
+ if (&v.front() <= t && &v.back() >= t)
break;
}
return i;
@@ -190,7 +190,7 @@ namespace ugdiss
/// add a sentence to the database
template<typename TOKEN>
- boost::shared_ptr<imTtrack<TOKEN> >
+ boost::shared_ptr<imTtrack<TOKEN> >
append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
{
#if 1
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h
index 53628e3b3..742e0dd4e 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h
@@ -15,14 +15,14 @@ using namespace std;
namespace ugdiss
{
- template<typename TKN>
- class
+ template<typename TKN>
+ class
LexicalPhraseScorer1
{
typedef boost::unordered_map<id_type, float> inner_map_t;
vector<inner_map_t> L1_given_L2;
vector<inner_map_t> L2_given_L1;
- void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
+ void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex);
public:
void open(string const& bname, string const& L1, string const& L2,
@@ -34,14 +34,14 @@ namespace ugdiss
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score);
- float permissive_lookup(vector<inner_map_t> const& lex,
+ float permissive_lookup(vector<inner_map_t> const& lex,
id_type const s, id_type const t) const;
};
-
+
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
- load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
+ load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex)
{
boost::iostreams::filtering_istream in;
@@ -52,20 +52,20 @@ namespace ugdiss
while (in >> w1 >> w2 >> p)
{
id_type id1 = V1[w1];
- while (lex.size() <= id1)
+ while (lex.size() <= id1)
lex.push_back(inner_map_t());
lex[id1][V2[w2]] = p;
}
}
-
+
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
open(string const& bname, string const& L1, string const& L2,
TokenIndex & V1, TokenIndex & V2)
{
- string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
- string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
+ string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
+ string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
cout << lex1 << endl;
cout << lex2 << endl;
load_lex(lex1,V1,V2,L1_given_L2);
@@ -86,9 +86,9 @@ namespace ugdiss
{
i1 = aln[k]; i2 = aln[++k];
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
+ p1[i1] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
++c1[i1];
- p2[i2] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
+ p2[i2] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
++c2[i2];
}
fwd_score = 0;
@@ -110,7 +110,7 @@ namespace ugdiss
template<typename TKN>
float
LexicalPhraseScorer1<TKN>::
- permissive_lookup(vector<inner_map_t> const& lex,
+ permissive_lookup(vector<inner_map_t> const& lex,
id_type const s, id_type const t) const
{
if (s >= lex.size()) return 1.0;
@@ -135,9 +135,9 @@ namespace ugdiss
// assert(snt1[i2].id() < L1_given_L2.size());
// assert(snt2[i2].id() < L2_given_L1.size());
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
+ p1[i1] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
++c1[i1];
- p2[i2] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
+ p2[i2] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
++c2[i2];
}
fwd_score = 0;
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
index b7e359223..fdd0366df 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h
@@ -18,8 +18,8 @@ using namespace std;
namespace ugdiss
{
- template<typename TKN>
- class
+ template<typename TKN>
+ class
LexicalPhraseScorer2
{
vector<string> ftag;
@@ -28,28 +28,28 @@ namespace ugdiss
table_t COOC;
void open(string const& fname);
template<typename someint>
- void
+ void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<someint> const & aln, float const alpha,
float & fwd_score, float& bwd_score) const;
- void
+ void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float const alpha, float & fwd_score, float& bwd_score) const;
// plup: permissive lookup
- float plup_fwd(id_type const s,id_type const t, float const alpha) const;
+ float plup_fwd(id_type const s,id_type const t, float const alpha) const;
float plup_bwd(id_type const s,id_type const t, float const alpha) const;
- // to be done:
- // - on-the-fly smoothing ?
- // - better (than permissive-lookup) treatment of unknown combinations
+ // to be done:
+ // - on-the-fly smoothing ?
+ // - better (than permissive-lookup) treatment of unknown combinations
// permissive lookup is currently used for compatibility reasons
// - zens-ney smoothed scoring via noisy-or combination
};
-
+
template<typename TKN>
void
LexicalPhraseScorer2<TKN>::
@@ -64,7 +64,7 @@ namespace ugdiss
LexicalPhraseScorer2<TKN>::
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
- vector<someint> const & aln, float const alpha,
+ vector<someint> const & aln, float const alpha,
float & fwd_score, float& bwd_score) const
{
vector<float> p1(e1,0), p2(e2,0);
@@ -74,9 +74,9 @@ namespace ugdiss
{
i1 = aln[k]; i2 = aln[++k];
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += plup_fwd(snt1[i1].id(),snt2[i2].id(),alpha);
+ p1[i1] += plup_fwd(snt1[i1].id(),snt2[i2].id(),alpha);
++c1[i1];
- p2[i2] += plup_bwd(snt1[i1].id(),snt2[i2].id(),alpha);
+ p2[i2] += plup_bwd(snt1[i1].id(),snt2[i2].id(),alpha);
++c2[i2];
}
fwd_score = 0;
@@ -105,19 +105,19 @@ namespace ugdiss
<< ": alpha parameter must be >= 0");
float ret = COOC[s][t]+alpha;
ret = (ret?ret:1.)/(COOC.m1(s)+alpha);
- UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
+ UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
<< ": result not > 0 and <= 1. alpha = " << alpha << "; "
<< COOC[s][t] << "/" << COOC.m1(s));
#if 0
- cerr << "[" << s << "," << t << "] "
- << COOC.m1(s) << "/"
- << COOC[s][t] << "/"
+ cerr << "[" << s << "," << t << "] "
+ << COOC.m1(s) << "/"
+ << COOC[s][t] << "/"
<< COOC.m2(t) << endl;
#endif
return ret;
}
-
+
template<typename TKN>
float
LexicalPhraseScorer2<TKN>::
@@ -128,11 +128,11 @@ namespace ugdiss
<< ": alpha parameter must be >= 0");
float ret = float(COOC[s][t]+alpha);
ret = (ret?ret:1.)/(COOC.m2(t)+alpha);
- UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
+ UTIL_THROW_IF2(ret <= 0 || ret > 1, "At " << __FILE__ << ":" << __LINE__
<< ": result not > 0 and <= 1.");
return ret;
}
-
+
template<typename TKN>
void
LexicalPhraseScorer2<TKN>::
@@ -148,9 +148,9 @@ namespace ugdiss
{
x = binread(binread(x,i1),i2);
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
- p1[i1] += plup_fwd(snt1[i1].id(), snt2[i2].id(),alpha);
+ p1[i1] += plup_fwd(snt1[i1].id(), snt2[i2].id(),alpha);
++c1[i1];
- p2[i2] += plup_bwd(snt1[i1].id(), snt2[i2].id(),alpha);
+ p2[i2] += plup_bwd(snt1[i1].id(), snt2[i2].id(),alpha);
++c2[i2];
}
fwd_score = 0;
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
index 495501bd6..d0522c528 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc
@@ -10,26 +10,26 @@ namespace Moses
// bounds LFT and RGT and update the actual bounds L and R; update
// the total count of alignment links in the underlying phrase
// pair
- bool
+ bool
check(vector<ushort> const& v, // alignment row/column
size_t const LFT, size_t const RGT, // hard limits
ushort& L, ushort& R, size_t& count) // current bounds, count
{
if (v.size() == 0) return 0;
- if (L > v.front() && (L=v.front()) < LFT) return false;
+ if (L > v.front() && (L=v.front()) < LFT) return false;
if (R < v.back() && (R=v.back()) > RGT) return false;
count += v.size();
return true;
}
-
+
/// return number of alignment points in box, -1 on failure
- int
+ int
expand_block(vector<vector<ushort> > const& row2col,
vector<vector<ushort> > const& col2row,
size_t row, size_t col, // seed coordinates
- size_t const TOP, size_t const LFT, // hard limits
- size_t const BOT, size_t const RGT, // hard limits
- ushort* top = NULL, ushort* lft = NULL,
+ size_t const TOP, size_t const LFT, // hard limits
+ size_t const BOT, size_t const RGT, // hard limits
+ ushort* top = NULL, ushort* lft = NULL,
ushort* bot = NULL, ushort* rgt = NULL) // store results
{
if (row < TOP || row > BOT || col < LFT || col > RGT) return -1;
@@ -37,7 +37,7 @@ namespace Moses
UTIL_THROW_IF2(col >= col2row.size(), "out of bounds");
// ====================================================
- // tables grow downwards, so TOP is smaller than BOT!
+ // tables grow downwards, so TOP is smaller than BOT!
// ====================================================
ushort T, L, B, R; // box dimensions
@@ -45,7 +45,7 @@ namespace Moses
// if we start on an empty cell, search for the first alignment point
if (row2col[row].size() == 0 && col2row[col].size() == 0)
{
- if (row == TOP) while (row < BOT && !row2col[++row].size());
+ if (row == TOP) while (row < BOT && !row2col[++row].size());
else if (row == BOT) while (row > TOP && !row2col[--row].size());
if (col == LFT) while (col < RGT && !col2row[++col].size());
@@ -54,7 +54,7 @@ namespace Moses
if (row2col[row].size() == 0 && col2row[col].size() == 0)
return 0;
}
- if (row2col[row].size() == 0)
+ if (row2col[row].size() == 0)
row = col2row[col].front();
if (col2row[col].size() == 0)
col = row2col[row].front();
@@ -65,9 +65,9 @@ namespace Moses
if ((R = row2col[row].back()) > RGT) return -1;
if (B == T && R == L) return 1;
-
+
// start/end of row / column coverage:
- ushort rs = row, re = row, cs = col, ce = col;
+ ushort rs = row, re = row, cs = col, ce = col;
int ret = row2col[row].size();
for (size_t tmp = 1; tmp; ret += tmp)
{
@@ -127,7 +127,7 @@ namespace Moses
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
while (s2-- && a2[s2].size() == 0);
-
+
Moses::LRModel::ReorderingType ret;
ret = (a2[s2].size() == 0 ? po_other :
a2[s2].back() < s1 ? Moses::LRModel::DR :
diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
index d432ea37e..9004b757e 100644
--- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
+++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h
@@ -7,13 +7,13 @@ namespace Moses { namespace bitext {
typedef Moses::LRModel::ReorderingType PhraseOrientation;
-PhraseOrientation
+PhraseOrientation
find_po_fwd(std::vector<std::vector<ushort> >& a1,
std::vector<std::vector<ushort> >& a2,
size_t b1, size_t e1,
size_t b2, size_t e2);
-PhraseOrientation
+PhraseOrientation
find_po_bwd(std::vector<std::vector<ushort> >& a1,
std::vector<std::vector<ushort> >& a2,
size_t b1, size_t e1,
@@ -21,5 +21,5 @@ find_po_bwd(std::vector<std::vector<ushort> >& a1,
-
+
}} // close namespaces
diff --git a/moses/TranslationModel/UG/mm/ug_load_primer.h b/moses/TranslationModel/UG/mm/ug_load_primer.h
index 1cd167a68..961c45da1 100644
--- a/moses/TranslationModel/UG/mm/ug_load_primer.h
+++ b/moses/TranslationModel/UG/mm/ug_load_primer.h
@@ -1,7 +1,7 @@
//-*- c++ -*-
#pragma once
#include <boost/iostreams/device/mapped_file.hpp>
-//
+//
namespace Moses
{
class FastLoader
@@ -14,5 +14,5 @@ namespace Moses
void prime(boost::iostreams::mapped_file_source const& f);
-
+
};
diff --git a/moses/TranslationModel/UG/mm/ug_lru_cache.h b/moses/TranslationModel/UG/mm/ug_lru_cache.h
index d1c9a9767..0000b194f 100644
--- a/moses/TranslationModel/UG/mm/ug_lru_cache.h
+++ b/moses/TranslationModel/UG/mm/ug_lru_cache.h
@@ -30,25 +30,25 @@ namespace lru_cache
// timeval tstamp; // time stamp
typename boost::shared_ptr<VAL> ptr; // cached shared ptr
};
-
+
mutable boost::shared_mutex m_lock;
uint32_t m_qfront, m_qback;
- vector<Record> m_recs;
+ vector<Record> m_recs;
map_t m_idx;
- void
+ void
update_queue(KEY const& key, uint32_t const p)
{
// CALLER MUST LOCK!
- // "remove" item in slot p from it's current position of the
- // queue (which is different from the slot position) and move it
+ // "remove" item in slot p from it's current position of the
+ // queue (which is different from the slot position) and move it
// to the end
Record& r = m_recs[p];
if (m_recs.size() == 1)
r.next = r.prev = m_qback = m_qfront = 0;
-
+
if (r.key != key || p == m_qback) return;
-
+
if (m_qfront == p)
m_qfront = m_recs[r.next].prev = r.next;
else
@@ -65,8 +65,8 @@ namespace lru_cache
size_t capacity() const { return m_recs.capacity(); }
void reserve(size_t s) { m_recs.reserve(s); }
- sptr<VAL>
- get(KEY const& key)
+ sptr<VAL>
+ get(KEY const& key)
{
uint32_t p;
{ // brackets needed for lock scoping
@@ -86,13 +86,13 @@ namespace lru_cache
boost::lock_guard<boost::shared_mutex> lock(m_lock);
pair<typename map_t::iterator,bool> foo;
foo = m_idx.insert(make_pair(key,m_recs.size()));
-
+
uint32_t p = foo.first->second;
if (foo.second) // was not in the cache
{
if (m_recs.size() < m_recs.capacity())
m_recs.push_back(Record());
- else
+ else
{
foo.first->second = p = m_qfront;
m_idx.erase(m_recs[p].key);
diff --git a/moses/TranslationModel/UG/mm/ug_mm_2d_table.h b/moses/TranslationModel/UG/mm/ug_mm_2d_table.h
index cfc86b8fc..2455ca603 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_2d_table.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_2d_table.h
@@ -24,12 +24,12 @@ namespace ugdiss
ID id;
VAL val;
- bool
+ bool
operator<(ID const otherId) const
{
return id < otherId;
}
-
+
bool
operator<(Cell const& other) const
{
@@ -60,14 +60,14 @@ namespace ugdiss
ID numCols;
boost::shared_ptr<bio::mapped_file_source> file;
- VAL m1(ID key) const
- {
- return (key < numRows) ? M1[key] : INIT(0);
+ VAL m1(ID key) const
+ {
+ return (key < numRows) ? M1[key] : INIT(0);
}
VAL m2(ID key) const
{
- return (key < numCols) ? M2[key] : INIT(0);
+ return (key < numCols) ? M2[key] : INIT(0);
}
@@ -106,7 +106,7 @@ namespace ugdiss
Cell const* c = lower_bound(start,stop,key);
return (c != stop && c->id == key ? c->val : INIT(0));
}
-
+
template<typename OFFSET, typename ID, typename VAL, typename INIT>
void
mm2dTable<OFFSET,ID,VAL,INIT>::
@@ -140,10 +140,10 @@ namespace ugdiss
// cout << numRows << " rows; " << numCols << " columns " << endl;
M1 = reinterpret_cast<VAL const*>(index+numRows+1);
M2 = M1+numRows;
- // cout << "Table " << fname << " has " << numRows << " rows and "
+ // cout << "Table " << fname << " has " << numRows << " rows and "
// << numCols << " columns." << endl;
- // cout << "File size is " << file.size()*1024 << " bytes; ";
- // cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
+ // cout << "File size is " << file.size()*1024 << " bytes; ";
+ // cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
// << " bytes into the file" << endl;
// cout << M2[0] << endl;
}
@@ -156,8 +156,8 @@ namespace ugdiss
typename ICONT // inner container type
>
void
- write_mm_2d_table(ostream& out, vector<ICONT> const& T,
- vector<VAL> const* m1 = NULL,
+ write_mm_2d_table(ostream& out, vector<ICONT> const& T,
+ vector<VAL> const* m1 = NULL,
vector<VAL> const* m2 = NULL)
{
assert(T.size());
@@ -223,7 +223,7 @@ namespace ugdiss
OFFSET o = index[i]; // (index[i]-index[0])/sizeof(VAL);
out.write(reinterpret_cast<char*>(&o),sizeof(OFFSET));
}
-
+
// write marginals
out.write(reinterpret_cast<char const*>(&(*m1)[0]),m1->size()*sizeof(VAL));
out.write(reinterpret_cast<char const*>(&(*m2)[0]),m2->size()*sizeof(VAL));
diff --git a/moses/TranslationModel/UG/mm/ug_mm_bitext.h b/moses/TranslationModel/UG/mm/ug_mm_bitext.h
index 5b18ff1fa..be3fdfce8 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_bitext.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_bitext.h
@@ -3,7 +3,7 @@
namespace Moses
{
- namespace bitext
+ namespace bitext
{
template<typename TKN>
class mmBitext : public Bitext<TKN>
@@ -17,18 +17,18 @@ namespace Moses
template<typename TKN>
mmBitext<TKN>::
mmBitext()
- : Bitext<TKN>(new mmTtrack<TKN>(), new mmTtrack<TKN>(), new mmTtrack<char>(),
- new TokenIndex(), new TokenIndex(),
+ : Bitext<TKN>(new mmTtrack<TKN>(), new mmTtrack<TKN>(), new mmTtrack<char>(),
+ new TokenIndex(), new TokenIndex(),
new mmTSA<TKN>(), new mmTSA<TKN>())
{};
-
+
template<typename TKN>
void
mmBitext<TKN>::
load_document_map(string const& fname)
{
ifstream docmap(fname.c_str());
- // the docmap file should list the documents in the corpus
+ // the docmap file should list the documents in the corpus
// in the order in which they appear with one line per document:
// <docname> <number of lines / sentences>
//
@@ -38,22 +38,22 @@ namespace Moses
this->m_sid2docid.reset(new vector<id_type>(this->T1->size()));
while(getline(docmap,buffer))
{
- istringstream line(buffer);
+ istringstream line(buffer);
if (!(line>>docname)) continue; // empty line
if (docname.size() && docname[0] == '#') continue; // comment
size_t docid = this->m_docname2docid.size();
this->m_docname2docid[docname] = docid;
this->m_docname.push_back(docname);
line >> b;
- VERBOSE(1, "DOCUMENT MAP " << docname
+ VERBOSE(1, "DOCUMENT MAP " << docname
<< " " << a << "-" << b+a << endl);
for (b += a; a < b; ++a)
(*this->m_sid2docid)[a] = docid;
}
- UTIL_THROW_IF2(b != this->T1->size(),
+ UTIL_THROW_IF2(b != this->T1->size(),
"Document map doesn't match corpus!");
}
-
+
template<typename TKN>
void
mmBitext<TKN>::
@@ -77,6 +77,6 @@ namespace Moses
if (!access(docmapfile.c_str(),F_OK))
load_document_map(docmapfile);
}
-
+
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_mm_tsa.h b/moses/TranslationModel/UG/mm/ug_mm_tsa.h
index 9d5038e26..ff2d4c693 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_tsa.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_tsa.h
@@ -40,7 +40,7 @@ namespace ugdiss
char const* index_jump(char const* a, char const* z, float ratio) const;
char const* getLowerBound(id_type t) const;
char const* getUpperBound(id_type t) const;
-
+
public:
mmTSA();
mmTSA(string fname, Ttrack<TOKEN> const* c);
@@ -53,24 +53,24 @@ namespace ugdiss
rawCnt(char const* p, char const * const q) const;
void
- getCounts(char const* p, char const * const q,
+ getCounts(char const* p, char const * const q,
count_type& sids, count_type& raw) const;
- char const*
+ char const*
readSid(char const* p, char const* q, id_type& sid) const;
- char const*
+ char const*
readSid(char const* p, char const* q, ::uint64_t& sid) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, uint16_t& offset) const;
- char const*
+ char const*
readOffset(char const* p, char const* q, ::uint64_t& offset) const;
void sanityCheck() const;
- };
+ };
// ======================================================================
@@ -78,13 +78,13 @@ namespace ugdiss
* assumes that keys are flagged with '1', values with '0'
*/
template<typename TOKEN>
- char const*
+ char const*
mmTSA<TOKEN>::
index_jump(char const* a, char const* z, float ratio) const
{
assert(ratio >= 0 && ratio < 1);
char const* m = a+int(ratio*(z-a));
- if (m > a)
+ if (m > a)
{
while (m > a && *m < 0) --m;
while (m > a && *m >= 0) --m;
@@ -98,7 +98,7 @@ namespace ugdiss
template<typename TOKEN>
mmTSA<TOKEN>::
- mmTSA()
+ mmTSA()
{
this->startArray = NULL;
this->endArray = NULL;
@@ -136,9 +136,9 @@ namespace ugdiss
filepos_type idxOffset;
p = numread(p,idxOffset);
p = numread(p,this->indexSize);
-
+
// cerr << fname << ": " << idxOffset << " " << this->indexSize << endl;
-
+
this->startArray = p;
this->index = reinterpret_cast<filepos_type const*>(file.data()+idxOffset);
this->endArray = reinterpret_cast<char const*>(index);
@@ -153,7 +153,7 @@ namespace ugdiss
mmTSA<TOKEN>::
getLowerBound(id_type id) const
{
- if (id >= this->indexSize)
+ if (id >= this->indexSize)
return NULL;
return this->startArray + this->index[id];
}
@@ -165,7 +165,7 @@ namespace ugdiss
mmTSA<TOKEN>::
getUpperBound(id_type id) const
{
- if (id >= this->indexSize)
+ if (id >= this->indexSize)
return NULL;
// if (index[id] == index[id+1])
// return NULL;
@@ -232,13 +232,13 @@ namespace ugdiss
}
return ret;
}
-
+
// ======================================================================
template<typename TOKEN>
- void
+ void
mmTSA<TOKEN>::
- getCounts(char const* p, char const* const q,
+ getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const
{
raw = 0;
diff --git a/moses/TranslationModel/UG/mm/ug_mm_ttrack.h b/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
index 51ba21778..bfee14e3e 100644
--- a/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
+++ b/moses/TranslationModel/UG/mm/ug_mm_ttrack.h
@@ -26,7 +26,7 @@ namespace ugdiss
{
using namespace std;
namespace bio=boost::iostreams;
-
+
template<typename TKN=id_type>
class mmTtrack : public Ttrack<TKN>
{
@@ -38,21 +38,21 @@ namespace ugdiss
id_type numWords;
bio::mapped_file_source file;
Token const* data; // pointer to first word of first sentence
- id_type const* index; /* pointer to index (change data type for corpora
+ id_type const* index; /* pointer to index (change data type for corpora
* of more than four billion words)
*/
public:
mmTtrack(string fname);
mmTtrack();
- // return pointer to beginning of sentence
- Token const* sntStart(size_t sid) const;
+ // return pointer to beginning of sentence
+ Token const* sntStart(size_t sid) const;
- // return pointer to end of sentence
- Token const* sntEnd(size_t sid) const;
+ // return pointer to end of sentence
+ Token const* sntEnd(size_t sid) const;
// return size of corpus (in number of sentences)
- size_t size() const;
+ size_t size() const;
// return size of corpus (in number of sentences)
size_t numTokens() const;
@@ -60,23 +60,23 @@ namespace ugdiss
// open an mmTtrack file
void open(string fname);
- // FUNCTIONS FOR BUILDING CORPUS TRACKS
- // write a blank file header at the beginning of a new ttrack file
+ // FUNCTIONS FOR BUILDING CORPUS TRACKS
+ // write a blank file header at the beginning of a new ttrack file
void write_blank_file_header(ostream& out) const;
// write the sentence index /idx/ and fill the file header
- void write_index_and_finalize(ostream& out,
+ void write_index_and_finalize(ostream& out,
vector<id_type> const& idx,
count_type tokenCount) const;
// copy a contiguous sequence of sentences to another stream
// return the number of tokens copied
id_type copySentences(ostream& trg, id_type start, id_type stop) const;
-
+
/** find the sentence id of a given token */
- id_type findSid(TKN const* t) const;
+ id_type findSid(TKN const* t) const;
- id_type findSid(id_type tokenOffset) const;
+ id_type findSid(id_type tokenOffset) const;
/// re-assign ids based on the id maps in /f/
void remap(string const fname, vector<id_type const*> const & f) const;
@@ -88,7 +88,7 @@ namespace ugdiss
void
mmTtrack<TKN>::
remap(string const fname, vector<id_type const*> const & f) const
- {
+ {
bio::mapped_file myfile(fname);
assert(myfile.is_open());
Moses::prime(myfile);
@@ -110,7 +110,7 @@ namespace ugdiss
mmTtrack<TKN>::
size() const
{
- return this->numSent;
+ return this->numSent;
}
template<typename TKN>
@@ -118,17 +118,17 @@ namespace ugdiss
mmTtrack<TKN>::
numTokens() const
{
- return this->numWords;
+ return this->numWords;
}
template<typename TKN>
- TKN const*
+ TKN const*
mmTtrack<TKN>::
sntStart(size_t sid) const // return pointer to beginning of sentence
{
if (sid >= this->numSent)
{
- cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
+ cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
<< this->numSent <<")" << endl;
}
assert(sid < this->numSent);
@@ -136,14 +136,14 @@ namespace ugdiss
}
template<typename TKN>
- TKN const*
+ TKN const*
mmTtrack<TKN>::
sntEnd(size_t sid) const // return pointer to end of sentence
{
assert(sid < this->numSent);
return data+index[sid+1];
}
-
+
template<typename TKN>
mmTtrack<TKN>::
mmTtrack()
@@ -161,7 +161,7 @@ namespace ugdiss
}
template<typename TKN>
- void
+ void
mmTtrack<TKN>::
open(string fname)
{
@@ -235,7 +235,7 @@ namespace ugdiss
}
template<typename TKN>
- id_type
+ id_type
mmTtrack<TKN>::
copySentences(ostream& trg, id_type start, id_type stop) const
{
diff --git a/moses/TranslationModel/UG/mm/ug_mmbitext.cc b/moses/TranslationModel/UG/mm/ug_mmbitext.cc
index 2c00665bb..34e3f1b1e 100644
--- a/moses/TranslationModel/UG/mm/ug_mmbitext.cc
+++ b/moses/TranslationModel/UG/mm/ug_mmbitext.cc
@@ -21,7 +21,7 @@
// ++this->in_progress;
// this->lock.unlock();
// }
-
+
// void
// pstats::
// release()
@@ -52,7 +52,7 @@
// mmbitext()
// : ag(NULL)
// {
-
+
// }
// bool
@@ -78,13 +78,13 @@
// {
// if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
// else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
-// if (src < start || src >= stop)
+// if (src < start || src >= stop)
// forbidden.set(trg);
// else
// {
// lft = min(lft,trg);
// rgt = max(rgt,trg);
-// if (core_alignment)
+// if (core_alignment)
// {
// if (flip) aln[trg].push_back(src);
// else aln[src].push_back(trg);
@@ -101,16 +101,16 @@
// }
// cout << endl;
// #endif
-
+
// for (size_t i = lft; i <= rgt; ++i)
-// if (forbidden[i])
+// if (forbidden[i])
// return false;
-
+
// s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
// e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
-
+
// if (lft > rgt) return false;
-// if (core_alignment)
+// if (core_alignment)
// {
// core_alignment->clear();
// if (flip)
@@ -147,11 +147,11 @@
// prep2(phrase);
// }
-// sptr<mmbitext::pstats>
+// sptr<mmbitext::pstats>
// mmbitext::
// prep2(iter const& phrase)
// {
-// if (!ag)
+// if (!ag)
// {
// ag = new agenda(*this);
// ag->add_workers(20);
@@ -197,11 +197,11 @@
// continue;
// }
-// stats->lock.lock();
-// stats->good += 1;
+// stats->lock.lock();
+// stats->good += 1;
// stats->lock.unlock();
-// for (size_t k = 0; k < aln.size(); k += 2)
+// for (size_t k = 0; k < aln.size(); k += 2)
// aln[k] += s2 - s1;
// Token const* o = (fwd ? ag.bitext.T2 : ag.bitext.T1).sntStart(sid);
// float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
@@ -215,14 +215,14 @@
// stats->add(b,sample_weight,aln);
// if (i < e2) assert(b.extend(o[i].id()));
// }
-// if (fwd && s < s2)
-// for (size_t k = 0; k < aln.size(); k += 2)
+// if (fwd && s < s2)
+// for (size_t k = 0; k < aln.size(); k += 2)
// --aln[k];
// }
// stats->release();
// }
// }
-
+
// void
// mmbitext::
// pstats::
@@ -239,7 +239,7 @@
// agenda(mmbitext const& thebitext)
// : shutdown(false), doomed(0), bitext(thebitext)
// {
-
+
// }
// mmbitext::
@@ -259,13 +259,13 @@
// {
// if (ag) delete ag;
// }
-
+
// sptr<mmbitext::pstats>
// mmbitext::
// agenda::
// add_job(mmbitext::iter const& phrase, size_t const max_samples)
// {
-// static boost::posix_time::time_duration nodelay(0,0,0,0);
+// static boost::posix_time::time_duration nodelay(0,0,0,0);
// job j;
// j.stats.reset(new mmbitext::pstats());
@@ -296,11 +296,11 @@
// bool
// mmbitext::
// agenda::
-// get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
+// get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
// bool & fwd, sptr<mmbitext::pstats> & stats)
// {
// boost::unique_lock<boost::mutex> lock(this->lock);
-// if (this->doomed || this->shutdown)
+// if (this->doomed || this->shutdown)
// {
// if (this->doomed) --this->doomed;
// return false;
@@ -309,7 +309,7 @@
// // {
// // cerr << "no jobs" << endl;
// // this->ready.wait(lock);
-// // if (this->doomed || this->shutdown)
+// // if (this->doomed || this->shutdown)
// // {
// // if (this->doomed) --this->doomed;
// // return false;
@@ -346,7 +346,7 @@
// boost::lock_guard<boost::mutex> lock(stats->lock);
// if (stats->raw_cnt == ctr) ++stats->raw_cnt;
// size_t rnum = util::rand_excl(stats->raw_cnt - ctr++);
-// // cout << stats->raw_cnt << " " << ctr-1 << " "
+// // cout << stats->raw_cnt << " " << ctr-1 << " "
// // << rnum << " " << max_samples - stats->good << endl;
// if (rnum < max_samples - stats->good)
// {
@@ -364,7 +364,7 @@
// agenda::
// add_workers(int n)
// {
-// static boost::posix_time::time_duration nodelay(0,0,0,0);
+// static boost::posix_time::time_duration nodelay(0,0,0,0);
// boost::lock_guard<boost::mutex> lock(this->lock);
// // house keeping: remove all workers that have finished
// for (size_t i = 0; i < workers.size(); )
@@ -377,7 +377,7 @@
// }
// else ++i;
// }
-// if (n < 0)
+// if (n < 0)
// {
// this->doomed -= n;
// }
@@ -394,8 +394,8 @@
// mmbitext::
// jstats::
// jstats()
-// {
-// my_aln.reserve(1);
+// {
+// my_aln.reserve(1);
// }
// mmbitext::
@@ -406,8 +406,8 @@
// my_wcnt = other.wcnt();
// my_aln = other.aln();
// }
-
-// void
+
+// void
// mmbitext::
// jstats::
// add(float w, vector<uchar> const& a)
@@ -419,7 +419,7 @@
// {
// size_t i = 0;
// while (i < my_aln.size() && my_aln[i].second != a) ++i;
-// if (i == my_aln.size())
+// if (i == my_aln.size())
// my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
// else
// my_aln[i].first++;
@@ -431,7 +431,7 @@
// uint32_t
// mmbitext::
// jstats::
-// rcnt() const
+// rcnt() const
// { return my_rcnt; }
// float
@@ -443,7 +443,7 @@
// vector<pair<size_t, vector<uchar> > > const&
// mmbitext::
// jstats::
-// aln() const
+// aln() const
// { return my_aln; }
// }
diff --git a/moses/TranslationModel/UG/mm/ug_mmbitext.h b/moses/TranslationModel/UG/mm/ug_mmbitext.h
index e7378e7f6..3837abc59 100644
--- a/moses/TranslationModel/UG/mm/ug_mmbitext.h
+++ b/moses/TranslationModel/UG/mm/ug_mmbitext.h
@@ -4,10 +4,10 @@
// Written by Ulrich Germann
// things we can do to speed up things:
-// - set up threads at startup time that force the
+// - set up threads at startup time that force the
// data in to memory sequentially
//
-// - use multiple agendas for better load balancing and to avoid
+// - use multiple agendas for better load balancing and to avoid
// competition for locks
#include <string>
@@ -46,8 +46,8 @@ namespace Moses {
class jstats; // phrase pair ("joint") statistics
class agenda
{
- boost::mutex lock;
- boost::condition_variable ready;
+ boost::mutex lock;
+ boost::condition_variable ready;
class job;
class worker;
list<job> joblist;
@@ -59,9 +59,9 @@ namespace Moses {
agenda(mmbitext const& bitext);
~agenda();
void add_workers(int n);
- sptr<pstats> add_job(mmbitext::iter const& phrase,
+ sptr<pstats> add_job(mmbitext::iter const& phrase,
size_t const max_samples);
- bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
+ bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
bool & fwd, sptr<mmbitext::pstats> & stats);
};
@@ -72,22 +72,22 @@ namespace Moses {
mmTtrack<char> Tx; // word alignments
mmTtrack<Token> T1,T2; // token tracks
TokenIndex V1,V2; // vocabs
- mmTSA<Token> I1,I2; // suffix arrays
+ mmTSA<Token> I1,I2; // suffix arrays
/// given the source phrase sid[start:stop]
- // find the possible start (s1 .. s2) and end (e1 .. e2)
+ // find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
- // alignments in *core_alignment. If /flip/, source phrase is
+ // alignments in *core_alignment. If /flip/, source phrase is
// L2.
- bool
+ bool
find_trg_phr_bounds
- (size_t const sid, size_t const start, size_t const stop,
- size_t & s1, size_t & s2, size_t & e1, size_t & e2,
+ (size_t const sid, size_t const start, size_t const stop,
+ size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
- sptr<pstats>
+ sptr<pstats>
prep2(iter const& phrase);
public:
mmbitext();
@@ -105,8 +105,8 @@ namespace Moses {
jstats
{
uint32_t my_rcnt; // unweighted count
- float my_wcnt; // weighted count
- vector<pair<size_t, vector<uchar> > > my_aln;
+ float my_wcnt; // weighted count
+ vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
@@ -117,7 +117,7 @@ namespace Moses {
void add(float w, vector<uchar> const& a);
};
- // struct
+ // struct
// mmbitext:
// phrasepair
// {
@@ -125,32 +125,32 @@ namespace Moses {
// size_t len;
// size_t cnt;
// float fwd, bwd;
-
+
// map<uint32_t,uint32_t> aln;
// string toString(TokenIndex const& V) const;
// bool operator<(phrase const& other) const;
// bool operator>(phrase const& other) const;
// phrase(pair<pair<Token const*, size_t>,jstats> const & foo);
-
+
// };
- struct
+ struct
mmbitext::
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
- size_t raw_cnt; // (approximate) raw occurrence count
+ size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
- // size_t snt_cnt;
+ // size_t snt_cnt;
// size_t sample_snt;
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
- pstats();
+ pstats();
// vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
@@ -167,7 +167,7 @@ namespace Moses {
public:
worker(agenda& a);
void operator()();
-
+
};
class
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.cc b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
index ec3423fdc..d533dafa3 100644
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.cc
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.cc
@@ -3,10 +3,10 @@
namespace Moses {
namespace bitext {
-void
+void
fill_lr_vec2
-( LRModel::ModelType mdl, float const* const cnt,
- float const total, float* v)
+( LRModel::ModelType mdl, float const* const cnt,
+ float const total, float* v)
{
if (mdl == LRModel::Monotonic)
{
@@ -23,17 +23,17 @@ fill_lr_vec2
else if (mdl == LRModel::MSD)
{
float denom = log(total + 3);
- v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
- v[LRModel::S] = log(cnt[LRModel::S] + 1) - denom;
- v[LRModel::D] = log(cnt[LRModel::DR] +
+ v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
+ v[LRModel::S] = log(cnt[LRModel::S] + 1) - denom;
+ v[LRModel::D] = log(cnt[LRModel::DR] +
cnt[LRModel::DL] + 1) - denom;
}
else if (mdl == LRModel::MSLR)
{
float denom = log(total + 4);
- v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
+ v[LRModel::M] = log(cnt[LRModel::M] + 1) - denom;
v[LRModel::S] = log(cnt[LRModel::S] + 1) - denom;
- v[LRModel::DL] = log(cnt[LRModel::DL] + 1) - denom;
+ v[LRModel::DL] = log(cnt[LRModel::DL] + 1) - denom;
v[LRModel::DR] = log(cnt[LRModel::DR] + 1) - denom;
}
else UTIL_THROW2("Reordering type not recognized!");
diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h
index 70d4b0d82..53a9f761c 100644
--- a/moses/TranslationModel/UG/mm/ug_phrasepair.h
+++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h
@@ -11,7 +11,7 @@ namespace Moses
namespace bitext
{
template<typename Token>
- class
+ class
PhrasePair
{
public:
@@ -36,24 +36,24 @@ namespace Moses
bool operator<(PhrasePair const& other) const;
bool operator>(PhrasePair const& other) const;
- bool operator<=(PhrasePair const& other) const;
+ bool operator<=(PhrasePair const& other) const;
bool operator>=(PhrasePair const& other) const;
void init();
- void init(uint64_t const pid1, bool is_inverse,
+ void init(uint64_t const pid1, bool is_inverse,
Token const* x, uint32_t const len,
pstats const* ps = NULL, size_t const numfeats=0);
- PhrasePair const&
- update(uint64_t const pid2, Token const* x,
+ PhrasePair const&
+ update(uint64_t const pid2, Token const* x,
uint32_t const len, jstats const& js);
void
- fill_lr_vec(LRModel::Direction const& dir,
- LRModel::ModelType const& mdl,
+ fill_lr_vec(LRModel::Direction const& dir,
+ LRModel::ModelType const& mdl,
vector<float>& v) const;
void
- print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
+ print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const;
class SortByTargetIdSeq
@@ -62,7 +62,7 @@ namespace Moses
int cmp(PhrasePair const& a, PhrasePair const& b) const;
bool operator()(PhrasePair const& a, PhrasePair const& b) const;
};
-
+
class SortDescendingByJointCount
{
public:
@@ -73,8 +73,8 @@ namespace Moses
template<typename Token>
void PhrasePair<Token>
- ::init(uint64_t const pid1, bool is_inverse,
- Token const* x, uint32_t const len,
+ ::init(uint64_t const pid1, bool is_inverse,
+ Token const* x, uint32_t const len,
pstats const* ps, size_t const numfeats)
{
inverse = is_inverse;
@@ -98,15 +98,15 @@ namespace Moses
template<typename Token>
PhrasePair<Token> const&
PhrasePair<Token>
- ::update(uint64_t const pid2,
- Token const* x, uint32_t const len, jstats const& js)
+ ::update(uint64_t const pid2,
+ Token const* x, uint32_t const len, jstats const& js)
{
p2 = pid2;
start2 = x; len2 = len;
raw2 = js.cnt2();
joint = js.rcnt();
assert(js.aln().size());
- if (js.aln().size())
+ if (js.aln().size())
aln = js.aln()[0].second;
// float total_fwd = 0, total_bwd = 0;
// for (int i = 0; i <= Moses::LRModel::NONE; i++)
@@ -123,48 +123,48 @@ namespace Moses
dfwd[i] = js.dcnt_fwd(po);
dbwd[i] = js.dcnt_bwd(po);
}
-
+
indoc = js.indoc;
return *this;
}
template<typename Token>
- bool
+ bool
PhrasePair<Token>
- ::operator<(PhrasePair const& other) const
- {
- return this->score < other.score;
+ ::operator<(PhrasePair const& other) const
+ {
+ return this->score < other.score;
}
-
+
template<typename Token>
- bool
+ bool
PhrasePair<Token>
::operator>(PhrasePair const& other) const
- {
- return this->score > other.score;
+ {
+ return this->score > other.score;
}
template<typename Token>
- bool
+ bool
PhrasePair<Token>
- ::operator<=(PhrasePair const& other) const
- {
- return this->score <= other.score;
+ ::operator<=(PhrasePair const& other) const
+ {
+ return this->score <= other.score;
}
-
+
template<typename Token>
- bool
+ bool
PhrasePair<Token>
::operator>=(PhrasePair const& other) const
- {
- return this->score >= other.score;
+ {
+ return this->score >= other.score;
}
template<typename Token>
PhrasePair<Token> const&
PhrasePair<Token>
- ::operator+=(PhrasePair const& o)
- {
+ ::operator+=(PhrasePair const& o)
+ {
raw1 += o.raw1;
raw2 += o.raw2;
good1 += o.good1;
@@ -178,16 +178,16 @@ namespace Moses
template<typename Token>
PhrasePair<Token>
- ::PhrasePair(PhrasePair<Token> const& o)
+ ::PhrasePair(PhrasePair<Token> const& o)
: start1(o.start1) , start2(o.start2)
, len1(o.len1) , len2(o.len2)
, p1(o.p1) , p2(o.p2)
- , raw1(o.raw1) , raw2(o.raw2)
+ , raw1(o.raw1) , raw2(o.raw2)
, sample1(o.sample1) , sample2(o.sample2)
, good1(o.good1) , good2(o.good2)
- , joint(o.joint)
+ , joint(o.joint)
, fvals(o.fvals)
- , aln(o.aln)
+ , aln(o.aln)
, score(o.score)
, inverse(o.inverse)
, indoc(o.indoc)
@@ -198,7 +198,7 @@ namespace Moses
dbwd[i] = o.dbwd[i];
}
}
-
+
template<typename Token>
int PhrasePair<Token>
::SortByTargetIdSeq
@@ -207,7 +207,7 @@ namespace Moses
size_t i = 0;
Token const* x = a.start2;
Token const* y = b.start2;
- while (i < a.len2 && i < b.len2 && x->id() == y->id())
+ while (i < a.len2 && i < b.len2 && x->id() == y->id())
{
x = x->next();
y = y->next();
@@ -218,7 +218,7 @@ namespace Moses
if (i == b.len2) return 1;
return x->id() < y->id() ? -1 : 1;
}
-
+
template<typename Token>
bool PhrasePair<Token>
::SortByTargetIdSeq
@@ -237,16 +237,16 @@ namespace Moses
}
template<typename Token>
- bool
+ bool
PhrasePair<Token>
::SortDescendingByJointCount
::operator()(PhrasePair const& a, PhrasePair const& b) const
{
return this->cmp(a,b) < 0;
}
-
+
template<typename Token>
- void
+ void
PhrasePair<Token>
::init()
{
@@ -257,21 +257,21 @@ namespace Moses
}
- void
- fill_lr_vec2(LRModel::ModelType mdl, float const* const cnt,
+ void
+ fill_lr_vec2(LRModel::ModelType mdl, float const* const cnt,
float const total, float* v);
-
+
template<typename Token>
void
PhrasePair<Token>
- ::fill_lr_vec(LRModel::Direction const& dir,
- LRModel::ModelType const& mdl,
+ ::fill_lr_vec(LRModel::Direction const& dir,
+ LRModel::ModelType const& mdl,
vector<float>& v) const
{
// how many distinct scores do we have?
size_t num_scores = (mdl == LRModel::MSLR ? 4 : mdl == LRModel::MSD ? 3 : 2);
size_t offset;
- if (dir == LRModel::Bidirectional)
+ if (dir == LRModel::Bidirectional)
{
offset = num_scores;
num_scores *= 2;
@@ -281,32 +281,32 @@ namespace Moses
v.resize(num_scores);
// determine the denominator
- float total = 0;
- for (size_t i = 0; i <= LRModel::NONE; ++i)
+ float total = 0;
+ for (size_t i = 0; i <= LRModel::NONE; ++i)
total += dfwd[i];
if (dir != LRModel::Forward) // i.e., Backward or Bidirectional
fill_lr_vec2(mdl, dbwd, total, &v[0]);
if (dir != LRModel::Backward) // i.e., Forward or Bidirectional
fill_lr_vec2(mdl, dfwd, total, &v[offset]);
- }
-
+ }
+
template<typename Token>
void
PhrasePair<Token>
- ::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
+ ::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const
{
- out << toString (V1, this->start1, this->len1) << " ::: "
- << toString (V2, this->start2, this->len2) << " "
+ out << toString (V1, this->start1, this->len1) << " ::: "
+ << toString (V2, this->start2, this->len2) << " "
<< this->joint << " [";
for (size_t i = 0; i < this->indoc.size(); ++i)
- {
- if (i) out << " ";
- out << this->indoc[i];
+ {
+ if (i) out << " ";
+ out << this->indoc[i];
}
- out << "] [";
+ out << "] [";
vector<float> lrscores;
this->fill_lr_vec(LR.GetDirection(), LR.GetModelType(), lrscores);
for (size_t i = 0; i < lrscores.size(); ++i)
@@ -322,7 +322,7 @@ namespace Moses
if (i) *log << " ";
*log << p.dfwd[i];
}
- *log << "] [";
+ *log << "] [";
for (int i = 0; i <= Moses::LRModel::NONE; i++)
{
// PhraseOrientation po = static_cast<PhraseOrientation>(i);
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
index fea57e719..95b93ec7b 100644
--- a/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.cc
@@ -1,7 +1,7 @@
#include "ug_sampling_bias.h"
#include <iostream>
#include <boost/foreach.hpp>
-
+
#ifdef HAVE_CURLPP
#include <curlpp/Options.hpp>
#include <curlpp/cURLpp.hpp>
@@ -15,11 +15,11 @@ namespace Moses
using ugdiss::id_type;
#ifdef HAVE_CURLPP
- std::string
+ std::string
query_bias_server(std::string const& url, std::string const& text)
{
// communicate with the bias server; resuts will be in ...
- std::ostringstream os;
+ std::ostringstream os;
curlpp::Easy myRequest;
std::string query = url+curlpp::escape(text);
myRequest.setOpt(new curlpp::options::Url(query));
@@ -32,7 +32,7 @@ namespace Moses
DocumentBias
::DocumentBias
- ( std::vector<id_type> const& sid2doc,
+ ( std::vector<id_type> const& sid2doc,
std::map<std::string,id_type> const& docname2docid,
std::string const& server_url, std::string const& text,
std::ostream* log)
@@ -45,15 +45,15 @@ namespace Moses
#endif
}
- void
+ void
DocumentBias
::init_from_json
( std::string const& json, std::map<std::string,id_type> const& docname2docid,
std::ostream* log)
- { // poor man's special purpose json parser for responses from the
+ { // poor man's special purpose json parser for responses from the
// MMT bias server
-
- std::string d; float total = 0; std::map<std::string,float> bias;
+
+ std::string d; float total = 0; std::map<std::string,float> bias;
size_t i = 0; while (i < json.size() && json[i] != '"') ++i;
while (++i < json.size())
{
@@ -61,34 +61,34 @@ namespace Moses
if (i >= json.size()) break;
float& f = bias[json.substr(k,i-k)];
while (++i < json.size() && json[i] != ':');
- k = ++i;
+ k = ++i;
while (++i < json.size() && json[i] != ',' && json[i] != '}');
total += (f = atof(json.substr(k, i-k).c_str()));
k = ++i; while (i < json.size() && json[i] != '"') ++i;
}
-
+
typedef std::pair<std::string const,float> item;
- if (total) { BOOST_FOREACH(item& x, bias) { x.second /= total; } }
+ if (total) { BOOST_FOREACH(item& x, bias) { x.second /= total; } }
if (log)
{
- BOOST_FOREACH(item& x, bias)
+ BOOST_FOREACH(item& x, bias)
{
std::map<std::string,id_type>::const_iterator m;
m = docname2docid.find(x.first);
int docid = m != docname2docid.end() ? m->second : -1;
- *log << "CONTEXT SERVER RESPONSE "
+ *log << "CONTEXT SERVER RESPONSE "
<< "[" << docid << "] "
- << x.first << " " << x.second << std::endl;
+ << x.first << " " << x.second << std::endl;
}
}
init(bias, docname2docid);
-
+
// using xmlrpc_parse_json didn't always work (parser errors)
// xmlrpc_value* b = xmlrpc_parse_json(env ,buf.str().c_str());
- // std::cerr << "|" << buf.str() << "|" << std::endl;
- // // if (b == NULL) std::cerr << "OOpS" << std::endl;
+ // std::cerr << "|" << buf.str() << "|" << std::endl;
+ // // if (b == NULL) std::cerr << "OOpS" << std::endl;
// xmlrpc_c::value_struct v(b); // = *b;
- // std::map<std::string, xmlrpc_c::value> const
+ // std::map<std::string, xmlrpc_c::value> const
// bmap = static_cast<map<std::string, xmlrpc_c::value> >(v);
// std::map<std::string, float> bias;
// typedef std::map<std::string, xmlrpc_c::value>::value_type item;
@@ -99,11 +99,11 @@ namespace Moses
// }
// typedef std::map<std::string, float>::value_type fitem;
// BOOST_FOREACH(fitem const& x, bias)
- // std::cerr << x.first << " " << x.second/total << std::endl;
+ // std::cerr << x.first << " " << x.second/total << std::endl;
// // delete b;
}
- void
+ void
DocumentBias
::init(std::map<std::string,float> const& biasmap,
std::map<std::string,id_type> const& docname2docid)
@@ -119,60 +119,60 @@ namespace Moses
BOOST_FOREACH(doc_record const& d, docname2docid)
std::cerr << "BIAS " << d.first << " " << m_bias[d.second] << std::endl;
}
-
- id_type
+
+ id_type
DocumentBias
::GetClass(id_type const idx) const
- {
- return m_sid2docid.at(idx);
+ {
+ return m_sid2docid.at(idx);
}
-
- float
+
+ float
DocumentBias
- ::operator[](id_type const idx) const
- {
- UTIL_THROW_IF2(idx >= m_sid2docid.size(),
+ ::operator[](id_type const idx) const
+ {
+ UTIL_THROW_IF2(idx >= m_sid2docid.size(),
"Out of bounds: " << idx << "/" << m_sid2docid.size());
return m_bias[m_sid2docid[idx]];
}
- size_t
+ size_t
DocumentBias
- ::size() const
+ ::size() const
{ return m_sid2docid.size(); }
SentenceBias
- ::SentenceBias(std::vector<float> const& bias)
+ ::SentenceBias(std::vector<float> const& bias)
: m_bias(bias) { }
SentenceBias
::SentenceBias(size_t const s) : m_bias(s) { }
- id_type
+ id_type
SentenceBias
::GetClass(id_type idx) const { return idx; }
- float&
+ float&
SentenceBias
- ::operator[](id_type const idx)
+ ::operator[](id_type const idx)
{
UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds");
return m_bias[idx];
}
- float
+ float
SentenceBias
- ::operator[](id_type const idx) const
- {
+ ::operator[](id_type const idx) const
+ {
UTIL_THROW_IF2(idx >= m_bias.size(), "Out of bounds");
return m_bias[idx];
}
-
- size_t
+
+ size_t
SentenceBias
::size() const { return m_bias.size(); }
-
+
}
}
diff --git a/moses/TranslationModel/UG/mm/ug_sampling_bias.h b/moses/TranslationModel/UG/mm/ug_sampling_bias.h
index faed69e63..f540ddc76 100644
--- a/moses/TranslationModel/UG/mm/ug_sampling_bias.h
+++ b/moses/TranslationModel/UG/mm/ug_sampling_bias.h
@@ -15,54 +15,54 @@ namespace Moses
std::string query_bias_server(std::string const& url, std::string const& text);
- class SamplingBias
+ class SamplingBias
{
public:
int loglevel;
std::ostream* log;
- virtual float
+ virtual float
operator[](id_type const ID) const = 0;
// returns (unnormalized bias) for the class of item ID
- virtual size_t size() const = 0;
+ virtual size_t size() const = 0;
// number of classes
-
- virtual id_type
+
+ virtual id_type
GetClass(id_type const ID) const = 0;
// returns class of item ID
};
-
+
class
DocumentBias : public SamplingBias
{
std::vector<id_type> const& m_sid2docid;
std::vector<float> m_bias;
-
+
public:
-
+
DocumentBias(std::vector<id_type> const& sid2doc,
std::map<std::string,id_type> const& docname2docid,
std::string const& server_url, std::string const& text,
std::ostream* log);
- void
- init_from_json
- ( std::string const& json,
+ void
+ init_from_json
+ ( std::string const& json,
std::map<std::string,id_type> const& docname2docid,
std::ostream* log );
-
- void
+
+ void
init
( std::map<std::string,float> const& biasmap,
std::map<std::string,id_type> const& docname2docid);
-
- id_type
+
+ id_type
GetClass(id_type const idx) const;
- float
+ float
operator[](id_type const idx) const;
- size_t
+ size_t
size() const;
};
@@ -76,10 +76,10 @@ namespace Moses
id_type GetClass(id_type idx) const;
- float& operator[](id_type const idx);
- float operator[](id_type const idx) const;
+ float& operator[](id_type const idx);
+ float operator[](id_type const idx) const;
size_t size() const;
-
+
};
}
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h b/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h
index 034a74bd9..3af929644 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h
@@ -1,13 +1,13 @@
// -*- c++ -*-
// (c) 2007-2010 Ulrich Germann
// implementation of stuff related to ArrayEntries
-// this file should only be included via ug_tsa_base.h,
+// this file should only be included via ug_tsa_base.h,
// never by itself
#ifndef __ug_tsa_array_entry_h
#define __ug_tsa_array_entry_h
#include "ug_ttrack_position.h"
-namespace ugdiss
+namespace ugdiss
{
namespace tsa
{
@@ -20,7 +20,7 @@ namespace ugdiss
ArrayEntry();
ArrayEntry(char const* p);
-
+
template<typename TSA_TYPE>
ArrayEntry(TSA_TYPE const* S, char const* p);
@@ -34,7 +34,7 @@ namespace ugdiss
}
// template<typename TSA_TYPE>
- // class SamplingArrayEntryIterator
+ // class SamplingArrayEntryIterator
// : public tsa::ArrayEntry
// {
// size_t const N; // (approximate) total number of occurrences
@@ -46,7 +46,7 @@ namespace ugdiss
// public:
// SamplingArrayEntryIterator(TSA_TYPE::tree_iterator const& m, size_t const s);
// bool step(); // returns false when at end of range
- // bool done(); //
+ // bool done(); //
// };
// template<typename TSA_TYPE>
@@ -60,7 +60,7 @@ namespace ugdiss
// , root(m.root)
// , stop(m.upper_bound(-1))
// { }
-
+
// template<typename TSA_TYPE>
// bool
// SamplingArrayEntryIterator::
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_base.h b/moses/TranslationModel/UG/mm/ug_tsa_base.h
index 83593c79c..8a4117910 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_base.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_base.h
@@ -26,7 +26,7 @@ namespace ugdiss
namespace bio=boost::iostreams;
template<typename TKN>
- TKN const*
+ TKN const*
next(TKN const* x)
{
return static_cast<TKN const*>(x ? x->next() : NULL);
@@ -42,20 +42,20 @@ namespace ugdiss
* ordering of sequences. Both are decleared/defined in
* ug_corpus_token.{h|cc}
*/
- template<typename TKN>
- class TSA
+ template<typename TKN>
+ class TSA
{
public:
virtual ~TSA() {};
- typedef TSA_tree_iterator<TKN> tree_iterator;
+ typedef TSA_tree_iterator<TKN> tree_iterator;
// allows iteration over the array as if it were a trie
- typedef tsa::ArrayEntry ArrayEntry;
+ typedef tsa::ArrayEntry ArrayEntry;
/* an entry in the array, for iteration over all occurrences of a
* particular sequence */
- // typedef boost::dynamic_bitset<uint64_t> bitset;
+ // typedef boost::dynamic_bitset<uint64_t> bitset;
typedef boost::shared_ptr<bitvector> bitset_pointer;
typedef TKN Token;
- typedef BitSetCache<TSA<TKN> > BSC_t;
+ typedef BitSetCache<TSA<TKN> > BSC_t;
/* to allow caching of bit vectors that are expensive to create on
* the fly */
@@ -67,7 +67,7 @@ namespace ugdiss
char const* endArray; // ... and end ...
// of memory block storing the actual TSA
- size_t corpusSize;
+ size_t corpusSize;
/** size of the corpus (in number of sentences) of the corpus
* underlying the sequence array.
*
@@ -76,37 +76,37 @@ namespace ugdiss
* suffix array is based on a subset
* of the sentences of /corpus/.
*/
-
- id_type numTokens;
+
+ id_type numTokens;
/** size of the corpus (in number of tokens) of the corpus underlying the
- * sequence array.
+ * sequence array.
*
* ATTENTION: This number may differ from corpus->numTokens(), namely when
- * the suffix array is based on a subset of the sentences of
+ * the suffix array is based on a subset of the sentences of
* /corpus/.
*/
- id_type indexSize;
- // (number of entries +1) in the index of root-level nodes
+ id_type indexSize;
+ // (number of entries +1) in the index of root-level nodes
size_t BitSetCachingThreshold;
-
+
////////////////////////////////////////////////////////////////
// private member functions:
- /** @return an index position approximately /fraction/ between
+ /** @return an index position approximately /fraction/ between
* /startRange/ and /endRange/.
- */
- virtual
- char const*
- index_jump(char const* startRange,
- char const* stopRange,
+ */
+ virtual
+ char const*
+ index_jump(char const* startRange,
+ char const* stopRange,
float fraction) const = 0;
-
- /** return the index position of the first item that
+
+ /** return the index position of the first item that
* is equal to or includes [refStart,refStart+refLen) as a prefix
*/
- char const*
+ char const*
find_start(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
@@ -114,19 +114,19 @@ namespace ugdiss
/** return the index position of the first item that is greater than
* [refStart,refStart+refLen) and does not include it as a prefix
*/
- char const*
+ char const*
find_end(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
-
+
/** return the index position of the first item that is longer than
* [refStart,refStart+refLen) and includes it as a prefix
*/
- char const*
+ char const*
find_longer(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
-
+
/** Returns a char const* pointing to the position in the data block
* where the first item starting with token /id/ is located.
*/
@@ -140,37 +140,37 @@ namespace ugdiss
public:
boost::shared_ptr<BSC_t> bsc;
-
+
char const* arrayStart() const { return startArray; }
char const* arrayEnd() const { return endArray; }
- /** @return a pointer to the beginning of the index entry range covering
+ /** @return a pointer to the beginning of the index entry range covering
* [keyStart,keyStop)
*/
- char const*
+ char const*
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
- char const*
+ char const*
lower_bound(TKN const* keyStart, TKN const* keyStop) const;
- char const*
+ char const*
lower_bound(TKN const* keyStart, int keyLen) const;
- /** @return a pointer to the end point of the index entry range covering
+ /** @return a pointer to the end point of the index entry range covering
* [keyStart,keyStop)
*/
- char const*
- upper_bound(typename vector<TKN>::const_iterator const& keyStart,
+ char const*
+ upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
- char const*
+ char const*
upper_bound(TKN const* keyStart, int keyLength) const;
/** dump all suffixes in order to /out/ */
void dump(ostream& out, TokenIndex const& T) const;
-
- /** fill the dynamic bit set with true for all sentences that contain
+
+ /** fill the dynamic bit set with true for all sentences that contain
* /phrase/.
* @return the raw number of occurrences.
*/
@@ -188,70 +188,70 @@ namespace ugdiss
setTokenBits(char const* startRange, char const* endRange, size_t len,
bitvector& bs) const;
- /** read the sentence ID into /sid/
- * @return position of associated offset.
+ /** read the sentence ID into /sid/
+ * @return position of associated offset.
*
* The function provides an abstraction that uses the right
* interpretation of the position based on the subclass
* (memory-mapped or in-memory).
*/
virtual
- char const*
+ char const*
readSid(char const* p, char const* q, id_type& sid) const = 0;
virtual
- char const*
+ char const*
readSid(char const* p, char const* q, ::uint64_t& sid) const = 0;
- /** read the offset part of the index entry into /offset/
- * @return position of the next entry in the index.
+ /** read the offset part of the index entry into /offset/
+ * @return position of the next entry in the index.
*
* The function provides an abstraction that uses the right
* interpretation of the position based on the subclass
* (memory-mapped or in-memory).
*/
virtual
- char const*
+ char const*
readOffset(char const* p, char const* q, uint16_t& offset) const = 0;
virtual
- char const*
+ char const*
readOffset(char const* p, char const* q, ::uint64_t& offset) const = 0;
- /** @return sentence count
+ /** @return sentence count
*/
count_type
- sntCnt(char const* p, char const* const q) const;
-
+ sntCnt(char const* p, char const* const q) const;
+
count_type
- rawCnt2(TKN const* keyStart, size_t keyLen) const;
+ rawCnt2(TKN const* keyStart, size_t keyLen) const;
/** @return raw occurrence count
- *
+ *
* depending on the subclass, this is constant time (imTSA) or
* linear in in the number of occurrences (mmTSA).
*/
virtual
count_type
- rawCnt(char const* p, char const* const q) const = 0;
+ rawCnt(char const* p, char const* const q) const = 0;
- /** get both sentence and word counts.
+ /** get both sentence and word counts.
*
* Avoids having to go over the byte range representing the range
* of suffixes in question twice when dealing with memory-mapped
* suffix arrays.
- */
+ */
virtual
- void
- getCounts(char const* p, char const* const q,
- count_type& sids, count_type& raw) const = 0;
+ void
+ getCounts(char const* p, char const* const q,
+ count_type& sids, count_type& raw) const = 0;
- string
- suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
+ string
+ suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
const;
- string
- suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
+ string
+ suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
const;
tsa::ArrayEntry& readEntry(char const* p, tsa::ArrayEntry& I) const;
@@ -260,36 +260,36 @@ namespace ugdiss
char const* dataEnd() const;
bool sanityCheck1() const;
-
- /** Return an ID that represents a given phrase;
+
+ /** Return an ID that represents a given phrase;
This should NEVER be 0!
- Structure of a phrase ID:
+ Structure of a phrase ID:
leftmost 32 bits: sentence ID in the corpus
next 16 bits: offset from the start of the sentence
next 16 bits: length of the phrase
*/
- ::uint64_t
+ ::uint64_t
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
typename vector<TKN>::const_iterator const& pstop) const;
-
- ::uint64_t
+
+ ::uint64_t
getSequenceId(TKN const* t, ushort plen) const;
-
+
/** Return the phrase represented by phrase ID pid_ */
string
getSequence(::uint64_t pid, TokenIndex const& V) const;
-
+
/** Return the phrase represented by phrase ID pid_ */
vector<TKN>
getSequence(::uint64_t pid) const;
- TKN const*
+ TKN const*
getSequenceStart(::uint64_t) const;
ushort
getSequenceLength(::uint64_t) const;
- size_t
+ size_t
getCorpusSize() const;
Ttrack<TKN> const*
@@ -297,13 +297,13 @@ namespace ugdiss
bitset_pointer
getBitSet(TKN const* startKey, size_t keyLen) const;
-
+
boost::shared_ptr<bitvector>
- findTree(TKN const* treeStart, TKN const* treeEnd,
+ findTree(TKN const* treeStart, TKN const* treeEnd,
bitvector const* filter) const;
-
+
size_t markOccurrences(char const* lo, char const* up, size_t len,
- bitvector& bitset,
+ bitvector& bitset,
bool markOnlyStartPosition) const;
bool
@@ -311,13 +311,13 @@ namespace ugdiss
vector<tree_iterator>& dest) const;
double aveIndexEntrySize() const
- {
- return (endArray-startArray)/double(numTokens);
+ {
+ return (endArray-startArray)/double(numTokens);
}
public:
- // virtual
- sptr<TSA_tree_iterator<TKN> >
+ // virtual
+ sptr<TSA_tree_iterator<TKN> >
find(TKN const* start, size_t len) const
{
typedef TSA_tree_iterator<TKN> iter;
@@ -333,7 +333,7 @@ namespace ugdiss
// ======================================================================
// template<typename TOKEN>
- // sptr<TSA_tree_iterator<TOKEN> >
+ // sptr<TSA_tree_iterator<TOKEN> >
// TSA<TOKEN>::
// find(TOKEN const* start, size_t len) const
// {
@@ -354,7 +354,7 @@ namespace ugdiss
* @return number of total occurrences of the phrase in the corpus
*/
template<typename TKN>
- count_type
+ count_type
TSA<TKN>::
fillBitSet(vector<TKN> const& key,
bitvector& bitset) const
@@ -362,7 +362,7 @@ namespace ugdiss
if (!key.size()) return 0;
return fillBitset(&(key[0]),key.size(),bitset);
}
-
+
// ---------------------------------------------------------------------------
/** fill the dynamic bitset with information as to which sentences
@@ -370,7 +370,7 @@ namespace ugdiss
* @return number of total occurrences of the phrase in the corpus
*/
template<typename TKN>
- count_type
+ count_type
TSA<TKN>::
fillBitSet(TKN const* key, size_t keyLen,
bitvector& bitset) const
@@ -385,7 +385,7 @@ namespace ugdiss
// ---------------------------------------------------------------------------
template<typename TKN>
- count_type
+ count_type
TSA<TKN>::
setBits(char const* startRange, char const* endRange,
bitvector& bs) const
@@ -452,7 +452,7 @@ namespace ugdiss
* of the token range matching [startKey,endKey)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
find_start(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
@@ -485,12 +485,12 @@ namespace ugdiss
* of the token range matching [startKey,endKey)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
find_end(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const
-
+
{
char const* up = upX;
if (lo >= up) return NULL;
@@ -520,7 +520,7 @@ namespace ugdiss
* but continues on
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
find_longer(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
@@ -553,7 +553,7 @@ namespace ugdiss
* given search phrase
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
@@ -570,7 +570,7 @@ namespace ugdiss
* given search phrase
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
lower_bound(TKN const* const keyStart,
TKN const* const keyStop) const
@@ -579,7 +579,7 @@ namespace ugdiss
}
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
lower_bound(TKN const* const keyStart, int keyLen) const
{
@@ -595,7 +595,7 @@ namespace ugdiss
* given search phrase (i.e., points just beyond the range)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
@@ -612,7 +612,7 @@ namespace ugdiss
* given search phrase (i.e., points just beyond the range)
*/
template<typename TKN>
- char const*
+ char const*
TSA<TKN>::
upper_bound(TKN const* keyStart, int keyLength) const
{
@@ -645,7 +645,7 @@ namespace ugdiss
{
return getSequenceId(&(*pstart),pstop-pstart);
}
-
+
//---------------------------------------------------------------------------
template<typename TKN>
@@ -667,14 +667,14 @@ namespace ugdiss
//---------------------------------------------------------------------------
- template<typename TKN>
+ template<typename TKN>
vector<TKN>
TSA<TKN>::
getSequence(::uint64_t pid) const
{
size_t plen = pid % 65536;
size_t offset = (pid >> 16) % 65536;
- TKN const* w = corpus->sntStart(pid >> 32)+offset;
+ TKN const* w = corpus->sntStart(pid >> 32)+offset;
vector<TKN> ret(plen);
for (size_t i = 0; i < plen; i++, w = w->next())
{
@@ -684,7 +684,7 @@ namespace ugdiss
return ret;
}
- template<typename TKN>
+ template<typename TKN>
string
TSA<TKN>::
getSequence(::uint64_t pid, TokenIndex const& V) const
@@ -698,21 +698,21 @@ namespace ugdiss
return buf.str();
}
-
+
//---------------------------------------------------------------------------
- template<typename TKN>
+ template<typename TKN>
TKN const*
TSA<TKN>::
getSequenceStart(::uint64_t pid) const
{
size_t offset = (pid >> 16) % 65536;
- return corpus->sntStart(pid >> 32)+offset;
+ return corpus->sntStart(pid >> 32)+offset;
}
-
+
//---------------------------------------------------------------------------
- template<typename TKN>
+ template<typename TKN>
ushort
TSA<TKN>::
getSequenceLength(::uint64_t pid) const
@@ -729,7 +729,7 @@ namespace ugdiss
{
return corpusSize;
}
-
+
//---------------------------------------------------------------------------
template<typename TKN>
@@ -756,7 +756,7 @@ namespace ugdiss
};
//---------------------------------------------------------------------------
-
+
/// find all instances of the tree described by [treeStart, treeEnd)
template<typename TKN>
typename TSA<TKN>::bitset_pointer
@@ -764,7 +764,7 @@ namespace ugdiss
getBitSet(TKN const* startKey, size_t keyLen) const
{
bitset_pointer ret;
- if (bsc != NULL)
+ if (bsc != NULL)
ret = bsc->get(startKey,keyLen);
else
{
@@ -773,7 +773,7 @@ namespace ugdiss
}
return ret;
}
-
+
//---------------------------------------------------------------------------
template<typename TKN>
@@ -809,12 +809,12 @@ namespace ugdiss
vector<tree_iterator>& dest) const
{
dest.assign(terminals.count(),tree_iterator(this));
- for (size_t i = terminals.find_first(), k = 0;
- i < terminals.size();
+ for (size_t i = terminals.find_first(), k = 0;
+ i < terminals.size();
i = terminals.find_next(i),++k)
{
for (TKN const* x = base+i; x && x->id(); x = x->next())
- if (!dest[k].extend(x->id()))
+ if (!dest[k].extend(x->id()))
return false;
}
typename tree_iterator::SortByApproximateCount sorter;
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h b/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h
index 3111f1c1d..d13449e36 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h
@@ -20,7 +20,7 @@ namespace ugdiss
{
using namespace std;
template<typename TSA>
- class
+ class
BitSetCache
{
public:
@@ -33,15 +33,15 @@ namespace ugdiss
myMap cached1,cached2;
int threshold;
public:
-
+
BitSetCache() : tsa(NULL), threshold(0) {};
- BitSetCache(TSA const* t, size_t th=4194304)
+ BitSetCache(TSA const* t, size_t th=4194304)
{
init(t,th);
};
- void
- init(TSA const* t, size_t th=4194304)
+ void
+ init(TSA const* t, size_t th=4194304)
{
tsa = t;
threshold = th;
@@ -84,7 +84,7 @@ namespace ugdiss
if (up-lo > threshold)
{
pair<char const*,ushort> k(lo,keyLen);
- // cout << "bla " << keyStart->id() << " "
+ // cout << "bla " << keyStart->id() << " "
// << cached2.size() << " " << up-lo << " " << k.second << endl;
myMapIter m = cached2.find(k);
if (m != cached2.end())
diff --git a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
index 508f09304..053ff2445 100644
--- a/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
+++ b/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h
@@ -23,24 +23,24 @@ namespace ugdiss
template<typename T>
void display(T const* x, string label)
{
- cout << label << ":";
- for (;x;x=next(x)) cout << " " << x->lemma;
- cout << endl;
+ cout << label << ":";
+ for (;x;x=next(x)) cout << " " << x->lemma;
+ cout << endl;
}
#endif
template<typename T> class TSA;
// CLASS DEFINITION
- // The TSA_tree_iterator allows traversal of a Token Sequence Array
+ // The TSA_tree_iterator allows traversal of a Token Sequence Array
// as if it was a trie.
//
// down(): go to first child
- // over(): go to next sibling
+ // over(): go to next sibling
// up(): go to parent
// extend(id): go to a specific child node
// all four functions return true if successful, false otherwise
- // lower_bound() and upper_bound() give the range of entries in the
+ // lower_bound() and upper_bound() give the range of entries in the
// array covered by the "virtual trie node".
template<typename TKN>
class
@@ -49,7 +49,7 @@ namespace ugdiss
protected:
vector<char const*> lower;
vector<char const*> upper;
-
+
// for debugging ...
void showBounds(ostream& out) const;
public:
@@ -57,7 +57,7 @@ namespace ugdiss
virtual ~TSA_tree_iterator() {};
- TSA<Token> const* root;
+ TSA<Token> const* root;
// TO BE DONE: make the pointer private and add a const function
// to return the pointer
@@ -66,16 +66,16 @@ namespace ugdiss
TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other);
TSA_tree_iterator(TSA<Token> const* r, id_type const* s, size_t const len);
// TSA_tree_iterator(TSA<Token> const* s, Token const& t);
- TSA_tree_iterator(TSA<Token> const* s,
- Token const* kstart,
- size_t const len,
+ TSA_tree_iterator(TSA<Token> const* s,
+ Token const* kstart,
+ size_t const len,
bool full_match_only=true);
- TSA_tree_iterator(TSA<Token> const* s,
- Token const* kstart,
- Token const* kend,
+ TSA_tree_iterator(TSA<Token> const* s,
+ Token const* kstart,
+ Token const* kend,
bool full_match_only=true);
- TSA_tree_iterator(TSA<Token> const* s,
- TokenIndex const& V,
+ TSA_tree_iterator(TSA<Token> const* s,
+ TokenIndex const& V,
string const& key);
char const* lower_bound(int p) const;
@@ -104,49 +104,49 @@ namespace ugdiss
bool match(id_type sid) const;
// fillBitSet: deprecated; use markSentences() instead
- count_type
+ count_type
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
-
- count_type
+
+ count_type
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<typename ::uint64_t>& dest) const;
- count_type
+ count_type
markSequence(Token const* start, Token const* stop, bitvector& dest) const;
-
- count_type
+
+ count_type
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
-
- count_type
+
+ count_type
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset,
bool markOnlyStartPosition=false) const;
-
- count_type
+
+ count_type
markOccurrences(vector<ushort>& dest) const;
-
- ::uint64_t
+
+ ::uint64_t
getSequenceId() const;
-
- // equivalent but more efficient than
+
+ // equivalent but more efficient than
// bitvector tmp; markSentences(tmp); foo &= tmp;
bitvector& filterSentences(bitvector& foo) const;
-
+
/// a special auxiliary function for finding trees
- void
- tfAndRoot(bitvector const& ref, // reference root positions
+ void
+ tfAndRoot(bitvector const& ref, // reference root positions
bitvector const& snt, // relevant sentences
bitvector& dest) const;
-
+
size_t arrayByteSpanSize(int p = -1) const
- {
+ {
if (lower.size()==0) return 0; // or endArray-startArray???
if (p < 0) p = lower.size()+p;
assert(p >=0 && p < int(lower.size()));
return lower.size() ? upper[p]-lower[p] : 0;
}
-
+
struct SortByApproximateCount
{
- bool operator()(TSA_tree_iterator const& a,
+ bool operator()(TSA_tree_iterator const& a,
TSA_tree_iterator const& b) const
{
if (a.size()==0) return b.size() ? true : false;
@@ -175,7 +175,7 @@ namespace ugdiss
size_t grow(Token const* snt, bitvector const& cov)
{
- size_t x = cov.find_first();
+ size_t x = cov.find_first();
while (x < cov.size() && extend(snt[x]))
x = cov.find_next(x);
return this->size();
@@ -183,7 +183,7 @@ namespace ugdiss
sptr<vector<typename ttrack::Position> >
randomSample(int level, size_t N) const;
-
+
};
//---------------------------------------------------------------------------
@@ -205,7 +205,7 @@ namespace ugdiss
assert(root->corpus->getToken(A));
assert(lo < root->getUpperBound(root->corpus->getToken(A)->id()));
lower.push_back(lo);
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
upper.push_back(root->upper_bound(foo,lower.size()));
return lower.size();
}
@@ -217,7 +217,7 @@ namespace ugdiss
Token const* z = next(a);
for (size_t i = 1; i < size(); ++i) z = next(z);
if (z < root->corpus->sntStart(A.sid) || z >= root->corpus->sntEnd(A.sid))
- {
+ {
char const* up = upper.back();
lo = root->find_longer(lo,up,a,lower.size(),0);
if (!lo) return false;
@@ -244,7 +244,7 @@ namespace ugdiss
TSA_tree_iterator<Token>::
over()
{
- if (lower.size() == 0)
+ if (lower.size() == 0)
return false;
if (lower.size() == 1)
{
@@ -254,7 +254,7 @@ namespace ugdiss
if (upper[0] < hi)
{
lower[0] = upper[0];
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
upper.back() = root->upper_bound(foo,lower.size());
}
else
@@ -264,11 +264,11 @@ namespace ugdiss
char const* lo = root->getLowerBound(wid);
if (lo == root->endArray) return false;
char const* hi = root->getUpperBound(wid);
- if (!hi) return false;
+ if (!hi) return false;
if (lo == hi) continue;
assert(lo);
lower[0] = lo;
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
upper.back() = root->upper_bound(foo,lower.size());
break;
}
@@ -293,7 +293,7 @@ namespace ugdiss
// display(root->corpus->getToken(U),"L2");
- Token const* foo = this->getToken(0);
+ Token const* foo = this->getToken(0);
// display(foo,"F!");
upper.back() = root->upper_bound(foo,lower.size());
return true;
@@ -326,17 +326,17 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s)
- : root(s)
+ : root(s)
{};
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, TSA_tree_iterator<Token> const& other)
- : root(s)
+ : root(s)
{
Token const* x = other.getToken(0);
for (size_t i = 0; i < other.size() && this->extend(x->id()); ++i)
- x = x->next();
+ x = x->next();
};
@@ -345,9 +345,9 @@ namespace ugdiss
TSA_tree_iterator<Token>::
TSA_tree_iterator
(TSA<Token> const* r,
- id_type const* s,
+ id_type const* s,
size_t const len)
- : root(r)
+ : root(r)
{
for (id_type const* e = s + len; s < e && extend(*s); ++s);
};
@@ -357,16 +357,16 @@ namespace ugdiss
#if 1
template<typename Token>
TSA_tree_iterator<Token>::
- TSA_tree_iterator(TSA<Token> const* s,
- TokenIndex const& V,
+ TSA_tree_iterator(TSA<Token> const* s,
+ TokenIndex const& V,
string const& key)
: root(s)
{
istringstream buf(key); string w;
while (buf >> w)
{
- if (this->extend(V[w]))
- continue;
+ if (this->extend(V[w]))
+ continue;
else
{
lower.clear();
@@ -377,7 +377,7 @@ namespace ugdiss
};
#endif
-#if 0
+#if 0
// ---------------------------------------------------------------------------
template<typename Token>
@@ -394,7 +394,7 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, Token const& t)
- : root(s)
+ : root(s)
{
if (!root) return;
char const* up = root->getUpperBound(t.id());
@@ -409,33 +409,33 @@ namespace ugdiss
template<typename Token>
TSA_tree_iterator<Token>::
- TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
+ TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
size_t const len, bool full_match_only)
- : root(s)
+ : root(s)
{
if (!root) return;
size_t i = 0;
for (; i < len && kstart && extend(*kstart); ++i)
kstart = kstart->next();
- if (full_match_only && i != len)
+ if (full_match_only && i != len)
{
lower.clear();
upper.clear();
}
};
- // DEPRECATED: DO NOT USE. Use the one that takes the length
+ // DEPRECATED: DO NOT USE. Use the one that takes the length
// instead of kend.
template<typename Token>
TSA_tree_iterator<Token>::
- TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
+ TSA_tree_iterator(TSA<Token> const* s, Token const* kstart,
Token const* kend, bool full_match_only)
- : root(s)
+ : root(s)
{
- for (;kstart != kend; kstart = kstart->next())
- if (!extend(*kstart))
+ for (;kstart != kend; kstart = kstart->next())
+ if (!extend(*kstart))
break;
- if (full_match_only && kstart != kend)
+ if (full_match_only && kstart != kend)
{
lower.clear();
upper.clear();
@@ -445,7 +445,7 @@ namespace ugdiss
// ---------------------------------------------------------------------------
// EXTEND
// ---------------------------------------------------------------------------
-
+
template<typename Token>
bool
TSA_tree_iterator<Token>::
@@ -496,9 +496,9 @@ namespace ugdiss
template<typename Token>
size_t
TSA_tree_iterator<Token>::
- size() const
- {
- return lower.size();
+ size() const
+ {
+ return lower.size();
}
// ---------------------------------------------------------------------------
@@ -506,8 +506,8 @@ namespace ugdiss
template<typename Token>
id_type
TSA_tree_iterator<Token>::
- getSid() const
- {
+ getSid() const
+ {
char const* p = (lower.size() ? lower.back() : root->startArray);
char const* q = (upper.size() ? upper.back() : root->endArray);
id_type sid;
@@ -520,8 +520,8 @@ namespace ugdiss
template<typename Token>
::uint64_t
TSA_tree_iterator<Token>::
- getPid(int p) const
- {
+ getPid(int p) const
+ {
if (this->size() == 0) return 0;
if (p < 0) p += upper.size();
char const* lb = lower_bound(p);
@@ -531,7 +531,7 @@ namespace ugdiss
::uint64_t ret = (sid<<32) + (off<<16) + ::uint64_t(p+1);
return ret;
}
-
+
// ---------------------------------------------------------------------------
template<typename Token>
@@ -614,7 +614,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
{
@@ -624,7 +624,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
{
@@ -651,7 +651,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset, bool markOnlyStartPosition) const
{
@@ -667,7 +667,7 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markOccurrences(vector<ushort>& dest) const
{
@@ -694,10 +694,10 @@ namespace ugdiss
}
//---------------------------------------------------------------------------
- // mark all endpoints of instances of the path represented by this
+ // mark all endpoints of instances of the path represented by this
// iterator in the sentence [start,stop)
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<typename ::uint64_t>& dest) const
@@ -726,10 +726,10 @@ namespace ugdiss
}
//---------------------------------------------------------------------------
- // mark all occurrences of the sequence represented by this
+ // mark all occurrences of the sequence represented by this
// iterator in the sentence [start,stop)
template<typename Token>
- count_type
+ count_type
TSA_tree_iterator<Token>::
markSequence(Token const* start,
Token const* stop,
@@ -784,7 +784,7 @@ namespace ugdiss
{
assert(x);
buf << (i > start ? " " : "");
- if (V) buf << (*V)[x->id()];
+ if (V) buf << (*V)[x->id()];
else buf << x->id();
}
return buf.str();
@@ -807,13 +807,13 @@ namespace ugdiss
{
assert(x);
buf << (i > start ? " " : "");
- buf << V[x->id()].str;
+ buf << V[x->id()].str;
}
return buf.str();
}
#endif
- /// @return true if the sentence [start,stop) contains the sequence
+ /// @return true if the sentence [start,stop) contains the sequence
template<typename Token>
bool
TSA_tree_iterator<Token>::
@@ -823,7 +823,7 @@ namespace ugdiss
for (Token const* t = start; t < stop; ++t)
{
if (*t != *a) continue;
- Token const* b = a;
+ Token const* b = a;
Token const* y = t;
size_t i;
for (i = 1; i < lower.size(); ++i)
@@ -838,7 +838,7 @@ namespace ugdiss
return false;
}
- /// @return true if the sentence /sid/ contains the sequence
+ /// @return true if the sentence /sid/ contains the sequence
template<typename Token>
bool
TSA_tree_iterator<Token>::
@@ -851,9 +851,9 @@ namespace ugdiss
// @param sntcheck: number of roots in the respective sentence
// @param dest: bitvector to keep track of the exact root location
template<typename Token>
- void
+ void
TSA_tree_iterator<Token>::
- tfAndRoot(bitvector const& ref, // reference root positions
+ tfAndRoot(bitvector const& ref, // reference root positions
bitvector const& snt, // relevant sentences
bitvector& dest) const
{
@@ -880,12 +880,12 @@ namespace ugdiss
filterSentences(bitvector& bv) const
{
float aveSntLen = root->corpus->numTokens()/root->corpus->size();
- size_t ANDcost = bv.size()/8; // cost of dest&=ref;
+ size_t ANDcost = bv.size()/8; // cost of dest&=ref;
float aveEntrySize = ((root->endArray-root->startArray)
/root->corpus->numTokens());
if (arrayByteSpanSize()+ANDcost < aveEntrySize*aveSntLen*bv.count())
{
- bitvector tmp(bv.size());
+ bitvector tmp(bv.size());
markSentences(tmp);
bv &= tmp;
}
@@ -906,9 +906,9 @@ namespace ugdiss
if (level < 0) level += lower.size();
assert(level >=0);
- sptr<vector<typename ttrack::Position> >
+ sptr<vector<typename ttrack::Position> >
ret(new vector<typename ttrack::Position>(N));
-
+
size_t m=0; // number of samples selected so far
typename Token::ArrayEntry I(lower.at(level));
@@ -916,7 +916,7 @@ namespace ugdiss
while (m < N && (I.next) < stop)
{
root->readEntry(I.next,I);
-
+
// t: expected number of remaining samples
const double t = (stop - I.pos)/root->aveIndexEntrySize();
const double r = util::rand_excl(t);
@@ -930,6 +930,6 @@ namespace ugdiss
return ret;
}
-
+
} // end of namespace ugdiss
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_ttrack_base.cc b/moses/TranslationModel/UG/mm/ug_ttrack_base.cc
index 644c53c3a..60d20a5f9 100644
--- a/moses/TranslationModel/UG/mm/ug_ttrack_base.cc
+++ b/moses/TranslationModel/UG/mm/ug_ttrack_base.cc
@@ -9,12 +9,12 @@
namespace ugdiss
{
using namespace std;
-
+
#if 0
template<>
id_type
Ttrack<id_type>::
- toID(id_type const& t)
+ toID(id_type const& t)
{
return t;
}
diff --git a/moses/TranslationModel/UG/mm/ug_ttrack_base.h b/moses/TranslationModel/UG/mm/ug_ttrack_base.h
index f9864bda6..d087a9e58 100644
--- a/moses/TranslationModel/UG/mm/ug_ttrack_base.h
+++ b/moses/TranslationModel/UG/mm/ug_ttrack_base.h
@@ -2,7 +2,7 @@
// Base class for corpus tracks. mmTtrack (memory-mapped Ttrack) and imTtrack (in-memory Ttrack)
// are derived from this class.
-// This code is part of a refactorization of the earlier Ttrack class as a template class for
+// This code is part of a refactorization of the earlier Ttrack class as a template class for
// tokens of arbitrary fixed-length size.
// (c) 2007-2009 Ulrich Germann. All rights reserved.
@@ -27,8 +27,8 @@ namespace ugdiss
typedef boost::dynamic_bitset<uint64_t> bdBitset;
template<typename sid_t, typename off_t, typename len_t>
- void
- parse_pid(uint64_t const pid, sid_t & sid,
+ void
+ parse_pid(uint64_t const pid, sid_t & sid,
off_t & off, len_t& len)
{
static uint64_t two32 = uint64_t(1)<<32;
@@ -39,12 +39,12 @@ namespace ugdiss
}
template<typename Token>
- string
+ string
toString(TokenIndex const& V, Token const* x, size_t const len)
{
if (!len) return "";
UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
- ostringstream buf;
+ ostringstream buf;
buf << V[x->id()];
size_t i = 1;
for (x = x->next(); x && i < len; ++i, x = x->next())
@@ -63,66 +63,66 @@ namespace ugdiss
typedef TKN Token;
/** @return a pointer to beginning of sentence /sid/ */
- virtual
- TKN const*
- sntStart(size_t sid) const = 0;
+ virtual
+ TKN const*
+ sntStart(size_t sid) const = 0;
/** @return end point of sentence /sid/ */
- virtual
- TKN const*
- sntEnd(size_t sid) const = 0;
+ virtual
+ TKN const*
+ sntEnd(size_t sid) const = 0;
TKN const*
getToken(Position const& p) const;
template<typename T>
- T const*
- getTokenAs(Position const& p) const
+ T const*
+ getTokenAs(Position const& p) const
{ return reinterpret_cast<T const*>(getToken(p)); }
template<typename T>
T const*
- sntStartAs(id_type sid) const
+ sntStartAs(id_type sid) const
{ return reinterpret_cast<T const*>(sntStart(sid)); }
template<typename T>
T const*
- sntEndAs(id_type sid) const
+ sntEndAs(id_type sid) const
{ return reinterpret_cast<T const*>(sntEnd(sid)); }
/** @return length of sentence /sid/ */
size_t sntLen(size_t sid) const { return sntEnd(sid) - sntStart(sid); }
- size_t
+ size_t
startPos(id_type sid) const { return sntStart(sid)-sntStart(0); }
-
- size_t
+
+ size_t
endPos(id_type sid) const { return sntEnd(sid)-sntStart(0); }
/** Don't use this unless you want a copy of the sentence */
- vector<TKN>
- operator[](id_type sid) const
- {
- return vector<TKN>(sntStart(sid),sntEnd(sid));
+ vector<TKN>
+ operator[](id_type sid) const
+ {
+ return vector<TKN>(sntStart(sid),sntEnd(sid));
}
/** @return size of corpus in number of sentences */
- virtual size_t size() const = 0;
+ virtual size_t size() const = 0;
/** @return size of corpus in number of words/tokens */
- virtual size_t numTokens() const = 0;
+ virtual size_t numTokens() const = 0;
- /** @return string representation of sentence /sid/
+ /** @return string representation of sentence /sid/
* Currently only defined for Ttrack<id_type> */
string str(id_type sid, TokenIndex const& T) const;
string pid2str(TokenIndex const* V, uint64_t pid) const;
- // /** @return string representation of sentence /sid/
+ // /** @return string representation of sentence /sid/
// * Currently only defined for Ttrack<id_type> */
// string str(id_type sid, Vocab const& V) const;
-
- /** counts the tokens in the corpus; used for example in the construction of
+
+ /** counts the tokens in the corpus; used for example in the construction of
* token sequence arrays */
count_type count_tokens(vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff=0, ostream* log=NULL) const;
@@ -130,7 +130,7 @@ namespace ugdiss
// static id_type toID(TKN const& t);
int cmp(Position const& A, Position const& B, int keyLength) const;
- int cmp(Position const& A, TKN const* keyStart, int keyLength=-1,
+ int cmp(Position const& A, TKN const* keyStart, int keyLength=-1,
int depth=0) const;
virtual id_type findSid(TKN const* t) const = 0; // find the sentence id of a given token
@@ -139,18 +139,18 @@ namespace ugdiss
// the following three functions are currently not used by any program ... (deprecate?)
TKN const*
- find_next_within_sentence(TKN const* startKey,
- int keyLength,
+ find_next_within_sentence(TKN const* startKey,
+ int keyLength,
Position startHere) const;
Position
- find_first(TKN const* startKey, int keyLength,
+ find_first(TKN const* startKey, int keyLength,
bdBitset const* filter=NULL) const;
Position
- find_next(TKN const* startKey, int keyLength, Position startAfter,
+ find_next(TKN const* startKey, int keyLength, Position startAfter,
bdBitset const* filter=NULL) const;
-
+
virtual size_t offset(TKN const* t) const { return t-sntStart(0); }
};
@@ -171,11 +171,11 @@ namespace ugdiss
template<typename TKN>
count_type
Ttrack<TKN>::
- count_tokens(vector<count_type>& cnt, bdBitset const* filter,
+ count_tokens(vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff, ostream* log) const
{
- bdBitset filter2;
- if (!filter)
+ bdBitset filter2;
+ if (!filter)
{
filter2.resize(this->size());
filter2.set();
@@ -184,21 +184,21 @@ namespace ugdiss
cnt.clear();
cnt.reserve(500000);
count_type totalCount=0;
-
+
int64_t expectedTotal=0;
for (size_t sid = 0; sid < this->size(); ++sid)
expectedTotal += this->sntLen(sid);
-
+
for (size_t sid = filter->find_first();
sid < filter->size();
sid = filter->find_next(sid))
{
TKN const* k = sntStart(sid);
TKN const* const stop = sntEnd(sid);
- if (lengthCutoff && stop-k >= lengthCutoff)
+ if (lengthCutoff && stop-k >= lengthCutoff)
{
- if (log)
- *log << "WARNING: skipping sentence #" << sid
+ if (log)
+ *log << "WARNING: skipping sentence #" << sid
<< " with more than 65536 tokens" << endl;
expectedTotal -= stop-k;
}
@@ -217,7 +217,7 @@ namespace ugdiss
if (this->size() == filter->count())
{
if (totalCount != expectedTotal)
- cerr << "OOPS: expected " << expectedTotal
+ cerr << "OOPS: expected " << expectedTotal
<< " tokens but counted " << totalCount << endl;
assert(totalCount == expectedTotal);
}
@@ -256,16 +256,16 @@ namespace ugdiss
a = next(a);
b = next(b);
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
- if (--keyLength==0 || b < bosB || b >= eosB)
- {
+ if (--keyLength==0 || b < bosB || b >= eosB)
+ {
ret = (a < bosA || a >= eosA) ? 0 : 1;
break;
}
}
// cerr << "RETURNING " << ret << endl;
- return ret;
+ return ret;
}
-
+
template<typename TKN>
int
Ttrack<TKN>::
@@ -287,17 +287,17 @@ namespace ugdiss
if (*x > *key) return 2;
key = key->next();
x = x->next();
- if (--keyLength==0) // || !key)
+ if (--keyLength==0) // || !key)
return (x == stopx) ? 0 : 1;
assert(key);
}
- return -1;
+ return -1;
}
template<typename TKN>
- TKN const*
+ TKN const*
Ttrack<TKN>::
- find_next_within_sentence(TKN const* startKey, int keyLength,
+ find_next_within_sentence(TKN const* startKey, int keyLength,
Position startHere) const
{
for (TKN const* t = getToken(startHere); t; t = getToken(startHere))
@@ -308,12 +308,12 @@ namespace ugdiss
{
TKN const* k = startKey->next();
TKN const* t2 = t->next();
- if (t2)
+ if (t2)
{
- cout << t2->lemma << "." << int(t2->minpos) << " "
+ cout << t2->lemma << "." << int(t2->minpos) << " "
<< k->lemma << "." << int(k->minpos) << " "
<< t2->cmp(*k) << endl;
- }
+ }
}
#endif
int x = cmp(startHere,startKey,keyLength,0);
@@ -330,8 +330,8 @@ namespace ugdiss
{
if (filter)
{
- for (size_t sid = filter->find_first();
- sid < filter->size();
+ for (size_t sid = filter->find_first();
+ sid < filter->size();
sid = filter->find_next(sid))
{
TKN const* x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
@@ -348,7 +348,7 @@ namespace ugdiss
}
return Position(this->size(),0);
}
-
+
template<typename TKN>
typename Ttrack<TKN>::Position
Ttrack<TKN>::
@@ -411,6 +411,6 @@ namespace ugdiss
}
return buf.str();
}
-
+
}
#endif
diff --git a/moses/TranslationModel/UG/mm/ug_ttrack_position.h b/moses/TranslationModel/UG/mm/ug_ttrack_position.h
index 64fab3afb..6d473f263 100644
--- a/moses/TranslationModel/UG/mm/ug_ttrack_position.h
+++ b/moses/TranslationModel/UG/mm/ug_ttrack_position.h
@@ -6,7 +6,7 @@
#include "ug_typedefs.h"
// A token position in a Ttrack, with a LESS functor for comparing token
-// positions in whatever sorting order the underlying token type implies.
+// positions in whatever sorting order the underlying token type implies.
//
// (c) 2007-2010 Ulrich Germann. All rights reserved.
@@ -26,19 +26,19 @@ namespace ugdiss
Position(id_type _sid, ushort _off);
template<typename TTRACK_TYPE> class LESS; // probably abandoned
}; // end of deklaration of Position
-
-#if 1
+
+#if 1
template<typename TTRACK_TYPE>
- class
+ class
Position::
LESS
{
TTRACK_TYPE const* c;
public:
typedef typename TTRACK_TYPE::Token Token;
-
+
LESS(TTRACK_TYPE const* crp) : c(crp) {};
-
+
bool operator()(Position const& A, Position const& B) const
{
Token const* a = c->getToken(A); assert(a);
@@ -48,30 +48,30 @@ namespace ugdiss
Token const* bosA = c->sntStart(A.sid);
Token const* eosA = c->sntEnd(A.sid);
-
+
Token const* bosB = c->sntStart(B.sid);
Token const* eosB = c->sntEnd(B.sid);
-
+
#if 0
- Token const* z = a;
+ Token const* z = a;
cout << "A: " << z->id();
for (z = next(z); z >= bosA && z < eosA; z = next(z))
- cout << "-" << z->id();
+ cout << "-" << z->id();
cout << endl;
-
- z = b;
+
+ z = b;
cout << "B: " << z->id();
for (z = next(z); z >= bosB && z < eosB; z = next(z))
- cout << "-" << z->id();
+ cout << "-" << z->id();
cout << endl;
#endif
while (*a == *b)
{
a = next(a);
b = next(b);
- if (a < bosA || a >= eosA)
+ if (a < bosA || a >= eosA)
return (b >= bosB && b < eosB);
- if (b < bosB || b >= eosB)
+ if (b < bosB || b >= eosB)
return false;
}
int x = a->cmp(*b);
@@ -86,4 +86,4 @@ namespace ugdiss
} // end of namespace ttrack
} // end of namespace ugdiss
#endif
-
+
diff --git a/moses/TranslationModel/UG/mm/ug_typedefs.h b/moses/TranslationModel/UG/mm/ug_typedefs.h
index 83c8684e0..0181bef9e 100644
--- a/moses/TranslationModel/UG/mm/ug_typedefs.h
+++ b/moses/TranslationModel/UG/mm/ug_typedefs.h
@@ -24,7 +24,7 @@ namespace ugdiss
typedef vector<vector<short> > short_2d_table;
typedef vector<short_2d_table> short_3d_table;
typedef vector<short_3d_table> short_4d_table;
-
+
typedef vector<vector<int> > int_2d_table;
typedef vector<int_2d_table> int_3d_table;
typedef vector<int_3d_table> int_4d_table;
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index 4e9e97766..6e680bbc5 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -19,7 +19,7 @@ namespace Moses
using namespace std;
using namespace boost;
- void
+ void
fillIdSeq(Phrase const& mophrase, size_t const ifactor,
TokenIndex const& V, vector<id_type>& dest)
{
@@ -30,8 +30,8 @@ namespace Moses
dest[i] = V[f->ToString()];
}
}
-
- void
+
+ void
parseLine(string const& line, map<string,string> & param)
{
char_separator<char> sep("; ");
@@ -79,13 +79,13 @@ namespace Moses
, context_key(((char*)this)+1)
// , m_tpc_ctr(0)
, ofactor(1,0)
- {
- init(line);
+ {
+ init(line);
setup_local_feature_functions();
Register();
}
- void
+ void
Mmsapt::
read_config_file(string fname, map<string,string>& param)
{
@@ -99,9 +99,9 @@ namespace Moses
tokenizer<char_separator<char> >::const_iterator t = tokens.begin();
if (t == tokens.end()) continue;
string& foo = param[*t++];
- if (t == tokens.end() || foo.size()) continue;
+ if (t == tokens.end() || foo.size()) continue;
// second condition: do not overwrite settings from the line in moses.ini
- UTIL_THROW_IF2(*t++ != "=" || t == tokens.end(),
+ UTIL_THROW_IF2(*t++ != "=" || t == tokens.end(),
"Syntax error in Mmsapt config file '" << fname << "'.");
for (foo = *t++; t != tokens.end(); foo += " " + *t++);
}
@@ -120,7 +120,7 @@ namespace Moses
m_is_integer.push_back(ff->isIntegerValued(i));
}
}
-
+
bool Mmsapt::isLogVal(int i) const { return m_is_logval.at(i); }
bool Mmsapt::isInteger(int i) const { return m_is_integer.at(i); }
@@ -130,7 +130,7 @@ namespace Moses
parseLine(line,this->param);
this->m_numScoreComponents = atoi(param["num-features"].c_str());
-
+
m = param.find("config");
if (m != param.end())
read_config_file(m->second,param);
@@ -138,17 +138,17 @@ namespace Moses
m = param.find("base");
if (m != param.end())
{
- m_bname = m->second;
+ m_bname = m->second;
m = param.find("path");
UTIL_THROW_IF2((m != param.end() && m->second != m_bname),
- "Conflicting aliases for path:\n"
+ "Conflicting aliases for path:\n"
<< "path=" << string(m->second) << "\n"
<< "base=" << m_bname.c_str() );
}
else m_bname = param["path"];
L1 = param["L1"];
L2 = param["L2"];
-
+
UTIL_THROW_IF2(m_bname.size() == 0, "Missing corpus base name at " << HERE);
UTIL_THROW_IF2(L1.size() == 0, "Missing L1 tag at " << HERE);
UTIL_THROW_IF2(L2.size() == 0, "Missing L2 tag at " << HERE);
@@ -157,11 +157,11 @@ namespace Moses
pair<string,string> dflt("input-factor","0");
input_factor = atoi(param.insert(dflt).first->second.c_str());
// shouldn't that be a string?
-
+
dflt = pair<string,string> ("output-factor","0");
output_factor = atoi(param.insert(dflt).first->second.c_str());
ofactor.assign(1,output_factor);
-
+
dflt = pair<string,string> ("smooth",".01");
m_lbop_conf = atof(param.insert(dflt).first->second.c_str());
@@ -177,7 +177,7 @@ namespace Moses
dflt = pair<string,string>("bias-loglevel","0");
m_bias_loglevel = atoi(param.insert(dflt).first->second.c_str());
-
+
dflt = pair<string,string>("table-limit","20");
m_tableLimit = atoi(param.insert(dflt).first->second.c_str());
@@ -188,25 +188,25 @@ namespace Moses
// in plain language: cache size is at least 1000, and 10,000 by default
// this cache keeps track of the most frequently used target
// phrase collections even when not actively in use
-
+
// Feature functions are initialized in function Load();
- param.insert(pair<string,string>("pfwd", "g"));
- param.insert(pair<string,string>("pbwd", "g"));
- param.insert(pair<string,string>("logcnt", "0"));
- param.insert(pair<string,string>("coh", "0"));
- param.insert(pair<string,string>("rare", "1"));
- param.insert(pair<string,string>("prov", "1"));
-
+ param.insert(pair<string,string>("pfwd", "g"));
+ param.insert(pair<string,string>("pbwd", "g"));
+ param.insert(pair<string,string>("logcnt", "0"));
+ param.insert(pair<string,string>("coh", "0"));
+ param.insert(pair<string,string>("rare", "1"));
+ param.insert(pair<string,string>("prov", "1"));
+
poolCounts = true;
-
+
// this is for pre-comuted sentence-level bias; DEPRECATED!
- if ((m = param.find("bias")) != param.end())
+ if ((m = param.find("bias")) != param.end())
m_bias_file = m->second;
- if ((m = param.find("bias-server")) != param.end())
+ if ((m = param.find("bias-server")) != param.end())
m_bias_server = m->second;
- if ((m = param.find("bias-logfile")) != param.end())
+ if ((m = param.find("bias-logfile")) != param.end())
{
m_bias_logfile = m->second;
if (m_bias_logfile == "/dev/stderr")
@@ -220,10 +220,10 @@ namespace Moses
}
}
- if ((m = param.find("lr-func")) != param.end())
+ if ((m = param.find("lr-func")) != param.end())
m_lr_func_name = m->second;
- if ((m = param.find("extra")) != param.end())
+ if ((m = param.find("extra")) != param.end())
m_extra_data = m->second;
dflt = pair<string,string>("tuneable","true");
@@ -239,7 +239,7 @@ namespace Moses
known_parameters.push_back("L1");
known_parameters.push_back("L2");
known_parameters.push_back("Mmsapt");
- known_parameters.push_back("PhraseDictionaryBitextSampling");
+ known_parameters.push_back("PhraseDictionaryBitextSampling");
// alias for Mmsapt
known_parameters.push_back("base"); // alias for path
known_parameters.push_back("bias");
@@ -259,7 +259,7 @@ namespace Moses
known_parameters.push_back("name");
known_parameters.push_back("num-features");
known_parameters.push_back("output-factor");
- known_parameters.push_back("path");
+ known_parameters.push_back("path");
known_parameters.push_back("pbwd");
known_parameters.push_back("pfwd");
known_parameters.push_back("prov");
@@ -275,12 +275,12 @@ namespace Moses
{
UTIL_THROW_IF2(!binary_search(known_parameters.begin(),
known_parameters.end(), m->first),
- HERE << ": Unknown parameter specification for Mmsapt: "
+ HERE << ": Unknown parameter specification for Mmsapt: "
<< m->first);
}
}
- void
+ void
Mmsapt::
load_bias(string const fname)
{
@@ -298,7 +298,7 @@ namespace Moses
// - sane word alignment?
vector<string> text1,text2,symal;
string line;
- filtering_istream in1,in2,ina;
+ filtering_istream in1,in2,ina;
open_input_stream(bname+L1+".txt.gz",in1);
open_input_stream(bname+L2+".txt.gz",in2);
@@ -314,7 +314,7 @@ namespace Moses
assert(btdyn);
cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
}
-
+
template<typename fftype>
void
Mmsapt::
@@ -334,7 +334,7 @@ namespace Moses
ff.reset(new fftype(spec));
register_ff(ff, m_active_ff_dyn);
}
- else
+ else
{
sptr<fftype> ff(new fftype(spec));
register_ff(ff, m_active_ff_common);
@@ -344,7 +344,7 @@ namespace Moses
template<typename fftype>
void
Mmsapt::
- check_ff(string const ffname, float const xtra,
+ check_ff(string const ffname, float const xtra,
vector<sptr<pscorer> >* registry)
{
string const& spec = param[ffname];
@@ -361,7 +361,7 @@ namespace Moses
ff.reset(new fftype(xtra,spec));
register_ff(ff, m_active_ff_dyn);
}
- else
+ else
{
sptr<fftype> ff(new fftype(xtra,spec));
register_ff(ff, m_active_ff_common);
@@ -394,28 +394,28 @@ namespace Moses
// standard (default) feature set
if (fsname == "standard")
{
- // lexical scores
+ // lexical scores
string lexfile = m_bname + L1 + "-" + L2 + ".lex";
- sptr<PScoreLex1<Token> >
+ sptr<PScoreLex1<Token> >
ff(new PScoreLex1<Token>(param["lex_alpha"],lexfile));
register_ff(ff,m_active_ff_common);
-
+
// these are always computed on pooled data
check_ff<PScoreRareness<Token> > ("rare", &m_active_ff_common);
check_ff<PScoreUnaligned<Token> >("unal", &m_active_ff_common);
check_ff<PScoreCoherence<Token> >("coh", &m_active_ff_common);
-
- // for these ones either way is possible (specification ends with '+'
- // if corpus-specific
+
+ // for these ones either way is possible (specification ends with '+'
+ // if corpus-specific
check_ff<PScorePfwd<Token> >("pfwd", m_lbop_conf);
check_ff<PScorePbwd<Token> >("pbwd", m_lbop_conf);
check_ff<PScoreLogCnt<Token> >("logcnt");
-
+
// These are always corpus-specific
check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_fix);
check_ff<PScoreProvenance<Token> >("prov", &m_active_ff_dyn);
}
-
+
// data source features (copies of phrase and word count specific to
// this translation model)
else if (fsname == "datasource")
@@ -456,14 +456,14 @@ namespace Moses
btfix.m_num_workers = this->m_workers;
btfix.open(m_bname, L1, L2);
btfix.setDefaultSampleSize(m_default_sample_size);
-
+
btdyn.reset(new imbitext(btfix.V1, btfix.V2, m_default_sample_size, m_workers));
if (m_bias_file.size())
load_bias(m_bias_file);
-
- if (m_extra_data.size())
+
+ if (m_extra_data.size())
load_extra_data(m_extra_data, false);
-
+
#if 0
// currently not used
LexicalPhraseScorer2<Token>::table_t & COOC = calc_lex.scorer.COOC;
@@ -490,18 +490,18 @@ namespace Moses
}
- TargetPhrase*
+ TargetPhrase*
Mmsapt::
mkTPhrase(Phrase const& src,
- PhrasePair<Token>* fix,
- PhrasePair<Token>* dyn,
+ PhrasePair<Token>* fix,
+ PhrasePair<Token>* dyn,
sptr<Bitext<Token> > const& dynbt) const
{
- UTIL_THROW_IF2(!fix && !dyn, HERE <<
+ UTIL_THROW_IF2(!fix && !dyn, HERE <<
": Can't create target phrase from nothing.");
vector<float> fvals(this->m_numScoreComponents);
PhrasePair<Token> pool = fix ? *fix : *dyn;
- if (fix)
+ if (fix)
{
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(btfix, *fix, &fvals);
@@ -511,7 +511,7 @@ namespace Moses
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_dyn)
(*ff)(*dynbt, *dyn, &fvals);
}
-
+
if (fix && dyn) { pool += *dyn; }
else if (fix)
{
@@ -533,7 +533,7 @@ namespace Moses
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_fix)
(*ff)(*dynbt, ff->allowPooling() ? pool : zilch, &fvals);
}
- if (fix)
+ if (fix)
{
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
(*ff)(btfix, pool, &fvals);
@@ -574,39 +574,39 @@ namespace Moses
const InputPathList &inputPathQueue) const
{
InputPathList::const_iterator iter;
- for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
+ for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
{
InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase();
PrefixExists(ttask, phrase); // launches parallel lookup
}
- for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
+ for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter)
{
InputPath &inputPath = **iter;
const Phrase &phrase = inputPath.GetPhrase();
- const TargetPhraseCollection *targetPhrases
+ const TargetPhraseCollection *targetPhrases
= this->GetTargetPhraseCollectionLEGACY(ttask,phrase);
inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
}
}
-
- TargetPhraseCollection const*
+
+ TargetPhraseCollection const*
Mmsapt::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
UTIL_THROW2("Don't call me without the translation task.");
}
- // This is not the most efficient way of phrase lookup!
- TargetPhraseCollection const*
+ // This is not the most efficient way of phrase lookup!
+ TargetPhraseCollection const*
Mmsapt::
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const
{
// map from Moses Phrase to internal id sequence
- vector<id_type> sphrase;
+ vector<id_type> sphrase;
fillIdSeq(src,input_factor,*(btfix.V1),sphrase);
if (sphrase.size() == 0) return NULL;
-
+
// Reserve a local copy of the dynamic bitext in its current form. /btdyn/
// is set to a new copy of the dynamic bitext every time a sentence pair
// is added. /dyn/ keeps the old bitext around as long as we need it.
@@ -631,11 +631,11 @@ namespace Moses
<< mdyn.size() << " " << mdyn.getPid() << endl;
#endif
- if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
+ if (mdyn.size() != sphrase.size() && mfix.size() != sphrase.size())
return NULL; // phrase not found in either bitext
// do we have cached results for this phrase?
- uint64_t phrasekey = (mfix.size() == sphrase.size()
+ uint64_t phrasekey = (mfix.size() == sphrase.size()
? (mfix.getPid()<<1) : (mdyn.getPid()<<1)+1);
// get context-specific cache of items previously looked up
@@ -647,25 +647,25 @@ namespace Moses
// was stored as the time stamp. For each word in the
// vocabulary, we also store its most recent occurrence in the
// bitext. Only if the timestamp of each word in the phrase is
- // newer than the timestamp of the phrase itself we must update
- // the entry.
+ // newer than the timestamp of the phrase itself we must update
+ // the entry.
if (ret) return ret; // yes, was cached => DONE
-
+
// OK: pt entry NOT found or NOT up to date
- // lookup and expansion could be done in parallel threads,
+ // lookup and expansion could be done in parallel threads,
// but ppdyn is probably small anyway
// TO DO: have Bitexts return lists of PhrasePairs instead of pstats
- // no need to expand pstats at every single lookup again, especially
+ // no need to expand pstats at every single lookup again, especially
// for btfix.
sptr<pstats> sfix,sdyn;
-
+
if (mfix.size() == sphrase.size()) sfix = btfix.lookup(ttask, mfix);
if (mdyn.size() == sphrase.size()) sdyn = dyn->lookup(ttask, mdyn);
vector<PhrasePair<Token> > ppfix,ppdyn;
PhrasePair<Token>::SortByTargetIdSeq sort_by_tgt_id;
- if (sfix)
+ if (sfix)
{
expand(mfix, btfix, *sfix, ppfix, m_bias_log);
sort(ppfix.begin(), ppfix.end(),sort_by_tgt_id);
@@ -706,8 +706,8 @@ namespace Moses
#if 0
- if (combine_pstats(src,
- mfix.getPid(), sfix.get(), btfix,
+ if (combine_pstats(src,
+ mfix.getPid(), sfix.get(), btfix,
mdyn.getPid(), sdyn.get(), *dyn, ret))
{
#if 0
@@ -733,7 +733,7 @@ namespace Moses
return ret;
}
- size_t
+ size_t
Mmsapt::
SetTableLimit(size_t limit)
{
@@ -762,14 +762,14 @@ namespace Moses
throw "CreateRuleLookupManager is currently not supported in Mmsapt!";
}
- void
+ void
Mmsapt::
InitializeForInput(ttasksptr const& ttask)
{
sptr<ContextScope> const& scope = ttask->GetScope();
- sptr<ContextForQuery> context
+ sptr<ContextForQuery> context
= scope->get<ContextForQuery>(&btfix, true);
- if (m_bias_server.size() && context->bias == NULL)
+ if (m_bias_server.size() && context->bias == NULL)
{ // we need to create the bias
boost::unique_lock<boost::shared_mutex> lock(context->lock);
string const& context_words = ttask->GetContextString();
@@ -778,18 +778,18 @@ namespace Moses
if (m_bias_log)
{
*m_bias_log << HERE << endl
- << "BIAS LOOKUP CONTEXT: "
- << context_words << endl;
+ << "BIAS LOOKUP CONTEXT: "
+ << context_words << endl;
context->bias_log = m_bias_log;
}
- context->bias
+ context->bias
= btfix.SetupDocumentBias(m_bias_server, context_words, m_bias_log);
context->bias->loglevel = m_bias_loglevel;
context->bias->log = m_bias_log;
}
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
- }
+ }
boost::unique_lock<boost::shared_mutex> mylock(m_lock);
sptr<TPCollCache> localcache = scope->get<TPCollCache>(cache_key);
if (!localcache)
@@ -798,12 +798,12 @@ namespace Moses
else localcache = m_cache;
scope->set<TPCollCache>(cache_key, localcache);
}
-
+
if (m_lr_func_name.size() && m_lr_func == NULL)
{
FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name);
m_lr_func = dynamic_cast<LexicalReordering*>(lr);
- UTIL_THROW_IF2(lr == NULL, "FF " << m_lr_func_name
+ UTIL_THROW_IF2(lr == NULL, "FF " << m_lr_func_name
<< " does not seem to be a lexical reordering function!");
// todo: verify that lr_func implements a hierarchical reordering model
}
@@ -813,7 +813,7 @@ namespace Moses
// Mmsapt::
// PrefixExists(Moses::Phrase const& phrase) const
// {
- // return PrefixExists(phrase,NULL);
+ // return PrefixExists(phrase,NULL);
// }
bool
@@ -821,11 +821,11 @@ namespace Moses
PrefixExists(ttasksptr const& ttask, Moses::Phrase const& phrase) const
{
if (phrase.GetSize() == 0) return false;
- vector<id_type> myphrase;
+ vector<id_type> myphrase;
fillIdSeq(phrase,input_factor,*btfix.V1,myphrase);
-
+
TSA<Token>::tree_iterator mfix(btfix.I1.get(),&myphrase[0],myphrase.size());
- if (mfix.size() == myphrase.size())
+ if (mfix.size() == myphrase.size())
{
btfix.prep(ttask, mfix);
// cerr << phrase << " " << mfix.approxOccurrenceCount() << endl;
@@ -872,7 +872,7 @@ namespace Moses
// return btfix.SetupDocumentBias(bias);
// }
- vector<float>
+ vector<float>
Mmsapt
::DefaultWeights() const
{ return vector<float>(this->GetNumScoreComponents(), 1.); }
diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h
index 4552ea8d2..5f688cfd8 100644
--- a/moses/TranslationModel/UG/mmsapt.h
+++ b/moses/TranslationModel/UG/mmsapt.h
@@ -38,13 +38,13 @@
// TO DO:
// - make lexical phrase scorer take addition to the "dynamic overlay" into account
// - switch to pool of sapts, where each sapt has its own provenance feature
-// RESEARCH QUESTION: is this more effective than having multiple phrase tables,
+// RESEARCH QUESTION: is this more effective than having multiple phrase tables,
// each with its own set of features?
namespace Moses
{
using namespace bitext;
- class Mmsapt
+ class Mmsapt
#ifndef NO_MOSES
: public PhraseDictionary
#endif
@@ -54,7 +54,7 @@ namespace Moses
friend class Alignment;
std::map<std::string,std::string> param;
std::string m_name;
- public:
+ public:
typedef L2R_Token<SimpleWordId> Token;
typedef mmBitext<Token> mmbitext;
typedef imBitext<Token> imbitext;
@@ -63,21 +63,21 @@ namespace Moses
typedef PhraseScorer<Token> pscorer;
private:
// vector<sptr<bitext> > shards;
- mmbitext btfix;
- sptr<imbitext> btdyn;
+ mmbitext btfix;
+ sptr<imbitext> btdyn;
std::string m_bname, m_extra_data, m_bias_file,m_bias_server;
std::string L1;
std::string L2;
float m_lbop_conf; // confidence level for lbop smoothing
float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
// alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
- // must be > 0 if dynamic
+ // must be > 0 if dynamic
size_t m_default_sample_size;
size_t m_workers; // number of worker threads for sampling the bitexts
std::vector<std::string> m_feature_set_names; // one or more of: standard, datasource
std::string m_bias_logfile;
boost::scoped_ptr<ofstream> m_bias_logger; // for logging to a file
- ostream* m_bias_log;
+ ostream* m_bias_log;
int m_bias_loglevel;
LexicalReordering* m_lr_func; // associated lexical reordering function
string m_lr_func_name; // name of associated lexical reordering function
@@ -88,47 +88,47 @@ namespace Moses
boost::shared_ptr<SamplingBias> m_bias; // for global default bias
boost::shared_ptr<TPCollCache> m_cache; // for global default bias
size_t m_cache_size; //
- size_t input_factor; //
+ size_t input_factor; //
size_t output_factor; // we can actually return entire Tokens!
// for display for human inspection (ttable dumps):
std::vector<std::string> m_feature_names; // names of features activated
- std::vector<bool> m_is_logval; // keeps track of which features are log valued
- std::vector<bool> m_is_integer; // keeps track of which features are integer valued
+ std::vector<bool> m_is_logval; // keeps track of which features are log valued
+ std::vector<bool> m_is_integer; // keeps track of which features are integer valued
std::vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
std::vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
- std::vector<sptr<pscorer > > m_active_ff_common;
+ std::vector<sptr<pscorer > > m_active_ff_common;
// activated feature functions (dyn)
- void
+ void
register_ff(sptr<pscorer> const& ff, std::vector<sptr<pscorer> > & registry);
template<typename fftype>
- void
+ void
check_ff(std::string const ffname,std::vector<sptr<pscorer> >* registry = NULL);
- // add feature function if specified
-
+ // add feature function if specified
+
template<typename fftype>
- void
- check_ff(std::string const ffname, float const xtra,
+ void
+ check_ff(std::string const ffname, float const xtra,
std::vector<sptr<pscorer> >* registry = NULL);
// add feature function if specified
void
add_corpus_specific_features(std::vector<sptr<pscorer > >& ffvec);
-
+
// built-in feature functions
// PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
// PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
- // PScoreLex<Token> calc_lex;
+ // PScoreLex<Token> calc_lex;
// this one I'd like to see as an external ff eventually
- // PScorePC<Token> apply_pp; // apply phrase penalty
+ // PScorePC<Token> apply_pp; // apply phrase penalty
// PScoreLogCounts<Token> add_logcounts_fix;
// PScoreLogCounts<Token> add_logcounts_dyn;
void init(std::string const& line);
mutable boost::shared_mutex m_lock;
- // mutable boost::shared_mutex m_cache_lock;
+ // mutable boost::shared_mutex m_cache_lock;
// for more complex operations on the cache
bool withPbwd;
bool poolCounts;
@@ -141,25 +141,25 @@ namespace Moses
void read_config_file(std::string fname, std::map<std::string,std::string>& param);
// phrase table feature weights for alignment:
- std::vector<float> feature_weights;
+ std::vector<float> feature_weights;
- std::vector<std::vector<id_type> > wlex21;
+ std::vector<std::vector<id_type> > wlex21;
// word translation lexicon (without counts, get these from calc_lex.COOC)
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
mm2dtable_t COOCraw;
- TargetPhrase*
- mkTPhrase(Phrase const& src,
- Moses::bitext::PhrasePair<Token>* fix,
- Moses::bitext::PhrasePair<Token>* dyn,
+ TargetPhrase*
+ mkTPhrase(Phrase const& src,
+ Moses::bitext::PhrasePair<Token>* fix,
+ Moses::bitext::PhrasePair<Token>* dyn,
sptr<Bitext<Token> > const& dynbt) const;
void
process_pstats
(Phrase const& src,
- uint64_t const pid1,
- pstats const& stats,
- Bitext<Token> const & bt,
+ uint64_t const pid1,
+ pstats const& stats,
+ Bitext<Token> const & bt,
TargetPhraseCollection* tpcoll
) const;
@@ -169,16 +169,16 @@ namespace Moses
uint64_t const pid1a, pstats * statsa, Bitext<Token> const & bta,
uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const;
-
+
bool
combine_pstats
- (Phrase const& src,
+ (Phrase const& src,
uint64_t const pid1a, pstats* statsa, Bitext<Token> const & bta,
- uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
+ uint64_t const pid1b, pstats const* statsb, Bitext<Token> const & btb,
TargetPhraseCollection* tpcoll) const;
void load_extra_data(std::string bname, bool locking);
- void load_bias(std::string bname);
+ void load_bias(std::string bname);
public:
// Mmsapt(std::string const& description, std::string const& line);
@@ -190,22 +190,22 @@ namespace Moses
std::string const& GetName() const;
#ifndef NO_MOSES
- TargetPhraseCollection const*
+ TargetPhraseCollection const*
GetTargetPhraseCollectionLEGACY(ttasksptr const& ttask, const Phrase& src) const;
- TargetPhraseCollection const*
+ TargetPhraseCollection const*
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
- void
+ void
GetTargetPhraseCollectionBatch(ttasksptr const& ttask,
const InputPathList &inputPathQueue) const;
-
+
//! Create a sentence-specific manager for SCFG rule lookup.
ChartRuleLookupManager*
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &);
-
+
ChartRuleLookupManager*
- CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &,
+ CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &,
std::size_t);
#endif
@@ -222,7 +222,7 @@ namespace Moses
bool ProvidesPrefixCheck() const; // return true if prefix /phrase/ check exists
// bool PrefixExists(Phrase const& phrase, SamplingBias const* const bias) const;
bool PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const;
-
+
bool isLogVal(int i) const;
bool isInteger(int i) const;
@@ -232,7 +232,7 @@ namespace Moses
void CleanUpAfterSentenceProcessing(ttasksptr const& ttask);
// align two new sentences
- sptr<std::vector<int> >
+ sptr<std::vector<int> >
align(std::string const& src, std::string const& trg) const;
std::vector<std::string> const&
diff --git a/moses/TranslationModel/UG/mmsapt_align.cc b/moses/TranslationModel/UG/mmsapt_align.cc
index 65cf979e1..13d8387d2 100644
--- a/moses/TranslationModel/UG/mmsapt_align.cc
+++ b/moses/TranslationModel/UG/mmsapt_align.cc
@@ -6,7 +6,7 @@
// using namespace bitext;
// using namespace std;
// using namespace boost;
-
+
// struct PPgreater
// {
// bool operator()(PhrasePair const& a, PhrasePair const& b)
@@ -28,7 +28,7 @@
// PhrasePair pp;
// ushort s1,e1,s2,e2; // start and end positions
// int prev; // preceding alignment hypothesis
-// float score;
+// float score;
// bitvector scov; // source coverage
// PhraseAlnHyp(PhrasePair const& ppx, int slen,
// pair<uint32_t,uint32_t> const& sspan,
@@ -37,7 +37,7 @@
// {
// s1 = sspan.first; e1 = sspan.second;
// s2 = tspan.first; e2 = tspan.second;
-// for (size_t i = s1; i < e1; ++i)
+// for (size_t i = s1; i < e1; ++i)
// scov.set(i);
// }
@@ -78,13 +78,13 @@
// return po_other;
// }
-// float
+// float
// dprob_fwd(PhraseAlnHyp const& next)
// {
// return pp.dfwd[po_fwd(&next)];
// }
-// float
+// float
// dprob_bwd(PhraseAlnHyp const& prev)
// {
// return pp.dbwd[po_bwd(&prev)];
@@ -102,15 +102,15 @@
// typedef pstats::trg_map_t jStatsTable;
// Mmsapt const& PT;
-// vector<id_type> s,t;
+// vector<id_type> s,t;
// pidmap_t sspan2pid, tspan2pid; // span -> phrase ID
// pid2span_t spid2span,tpid2span;
// vector<vector<sptr<pstats> > > spstats;
-// vector<PhrasePair> PP;
+// vector<PhrasePair> PP;
// // position-independent phrase pair info
// public:
-// vector<PhraseAlnHyp> PAH;
+// vector<PhraseAlnHyp> PAH;
// vector<vector<int> > tpos2ahyp;
// // maps from target start positions to PhraseAlnHyps starting at
// // that position
@@ -120,8 +120,8 @@
// void fill_sspan_maps();
// public:
// Alignment(Mmsapt const& pt, string const& src, string const& trg);
-// void show(ostream& out);
-// void show(ostream& out, PhraseAlnHyp const& ah);
+// void show(ostream& out);
+// void show(ostream& out, PhraseAlnHyp const& ah);
// };
// void
@@ -129,11 +129,11 @@
// show(ostream& out, PhraseAlnHyp const& ah)
// {
// #if 0
-// LexicalPhraseScorer2<Token>::table_t const&
+// LexicalPhraseScorer2<Token>::table_t const&
// COOCjnt = PT.calc_lex.scorer.COOC;
// out << setw(10) << exp(ah.score) << " "
-// << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2)
+// << PT.btfix.T2->pid2str(PT.btfix.V2.get(), ah.pp.p2)
// << " <=> "
// << PT.btfix.T1->pid2str(PT.btfix.V1.get(), ah.pp.p1);
// vector<uchar> const& a = ah.pp.aln;
@@ -168,7 +168,7 @@
// // << "]" << endl;
// #endif
// }
-
+
// void
// Alignment::
// show(ostream& out)
@@ -192,7 +192,7 @@
// return spstats[sspan.first][k];
// else return sptr<pstats>();
// }
-
+
// void
// Alignment::
// fill_tspan_maps()
@@ -207,7 +207,7 @@
// tpid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
// tspan2pid[i][k] = pid;
// }
-// }
+// }
// }
// void
@@ -230,11 +230,11 @@
// int y = p->second[0].second-1;
// spstats[i].push_back(spstats[x][y-x]);
// }
-// else
+// else
// {
// spstats[i].push_back(PT.btfix.lookup(m));
// cout << PT.btfix.T1->pid2str(PT.btfix.V1.get(),pid) << " "
-// << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt
+// << spstats[i].back()->good << "/" << spstats[i].back()->sample_cnt
// << endl;
// }
// spid2span[pid].push_back(pair<uint32_t,uint32_t>(i,k+1));
@@ -262,14 +262,14 @@
// // size_t m2 = COOC.m2(i);
// // if (j*1000 > m1 && j*1000 > m2)
// // cout << " " << (*PT.btfix.V1)[k];
-// // }
+// // }
// // }
// // cout << endl;
// // }
-
+
// fill_tspan_maps();
// fill_sspan_maps();
-// tpos2ahyp.resize(t.size());
+// tpos2ahyp.resize(t.size());
// // now fill the association score table
// PAH.reserve(1000000);
// typedef pid2span_t::iterator psiter;
@@ -301,12 +301,12 @@
// }
// }
-
+
// int
// extend(vector<PhraseAlnHyp> & PAH, int edge, int next)
// {
-// if ((PAH[edge].scov & PAH[next].scov).count())
+// if ((PAH[edge].scov & PAH[next].scov).count())
// return -1;
// int ret = PAH.size();
// PAH.push_back(PAH[next]);
diff --git a/moses/TranslationModel/UG/ptable-describe-features.cc b/moses/TranslationModel/UG/ptable-describe-features.cc
index dbd5accb9..c9dd3abd1 100644
--- a/moses/TranslationModel/UG/ptable-describe-features.cc
+++ b/moses/TranslationModel/UG/ptable-describe-features.cc
@@ -19,7 +19,7 @@ int main()
{
if (line.empty()) continue;
size_t k = line.find_first_not_of(" ");
- if (line.find("Mmsapt") != k &&
+ if (line.find("Mmsapt") != k &&
line.find("PhraseDictionaryBitextSampling") != k)
continue;
Mmsapt PT(line);
@@ -32,6 +32,6 @@ int main()
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc
index e165011c7..94627a02c 100644
--- a/moses/TranslationModel/UG/ptable-lookup.cc
+++ b/moses/TranslationModel/UG/ptable-lookup.cc
@@ -19,13 +19,13 @@ class SimplePhrase : public Moses::Phrase
vector<FactorType> const m_fo; // factor order
public:
SimplePhrase(): m_fo(1,FactorType(0)) {}
-
- void init(string const& s)
+
+ void init(string const& s)
{
istringstream buf(s); string w;
- while (buf >> w)
+ while (buf >> w)
{
- Word wrd;
+ Word wrd;
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
}
}
@@ -63,15 +63,15 @@ int main(int argc, char* argv[])
cerr << "Phrase table implementation not supported by this utility." << endl;
exit(1);
}
-
+
string line;
while (true)
{
Sentence phrase;
if (!phrase.Read(cin,ifo)) break;
- if (pdta)
+ if (pdta)
{
- pdta->InitializeForInput(phrase);
+ pdta->InitializeForInput(phrase);
// do we also need to call CleanupAfterSentenceProcessing at the end?
}
Phrase& p = phrase;
@@ -79,13 +79,13 @@ int main(int argc, char* argv[])
cout << p << endl;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
if (!trg) continue;
- vector<size_t> order(trg->GetSize());
+ vector<size_t> order(trg->GetSize());
for (size_t i = 0; i < order.size(); ++i) order[i] = i;
sort(order.begin(),order.end(),TargetPhraseIndexSorter(*trg));
size_t k = 0;
- // size_t precision =
+ // size_t precision =
cout.precision(2);
-
+
vector<string> fname;
if (mmsapt)
{
@@ -119,6 +119,6 @@ int main(int argc, char* argv[])
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/sapt_phrase_key.h b/moses/TranslationModel/UG/sapt_phrase_key.h
index e1ecf1573..0caf11e43 100644
--- a/moses/TranslationModel/UG/sapt_phrase_key.h
+++ b/moses/TranslationModel/UG/sapt_phrase_key.h
@@ -8,6 +8,6 @@ namespace sapt
using namespace Moses;
using namespace std;
-
+
}
diff --git a/moses/TranslationModel/UG/sapt_phrase_scorers.h b/moses/TranslationModel/UG/sapt_phrase_scorers.h
index 9870ed7f0..ace907d73 100644
--- a/moses/TranslationModel/UG/sapt_phrase_scorers.h
+++ b/moses/TranslationModel/UG/sapt_phrase_scorers.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Phrase scoring functions for suffix array-based phrase tables
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "sapt_pscore_unaligned.h" // count # of unaligned words
#include "sapt_pscore_provenance.h" // reward for joint phrase occ. per corpus
diff --git a/moses/TranslationModel/UG/sapt_pscore_base.h b/moses/TranslationModel/UG/sapt_pscore_base.h
index ff705f952..388c83d9b 100644
--- a/moses/TranslationModel/UG/sapt_pscore_base.h
+++ b/moses/TranslationModel/UG/sapt_pscore_base.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Base classes for suffix array-based phrase scorers
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -21,72 +21,72 @@ namespace Moses {
string m_tag;
vector<string> m_feature_names;
public:
-
- virtual
- void
- operator()(Bitext<Token> const& pt,
- PhrasePair<Token>& pp,
- vector<float> * dest=NULL)
+
+ virtual
+ void
+ operator()(Bitext<Token> const& pt,
+ PhrasePair<Token>& pp,
+ vector<float> * dest=NULL)
const = 0;
void
setIndex(int const i) { m_index = i; }
-
+
int
getIndex() const { return m_index; }
- int
+ int
fcnt() const { return m_num_feats; }
-
+
vector<string> const &
fnames() const { return m_feature_names; }
string const &
fname(int i) const
- {
+ {
if (i < 0) i += m_num_feats;
UTIL_THROW_IF2(i < 0 || i >= m_num_feats,
"Feature name index out of range at " << HERE);
- return m_feature_names.at(i);
+ return m_feature_names.at(i);
}
virtual
bool
- isLogVal(int i) const { return true; };
- // is this feature log valued?
-
+ isLogVal(int i) const { return true; };
+ // is this feature log valued?
+
virtual
bool
- isIntegerValued(int i) const { return false; };
- // is this feature integer valued (e.g., count features)?
+ isIntegerValued(int i) const { return false; };
+ // is this feature integer valued (e.g., count features)?
virtual
bool
allowPooling() const { return true; }
- // does this feature function allow pooling of counts if
+ // does this feature function allow pooling of counts if
// there are no occurrences in the respective corpus?
-
+
virtual
void
load() { }
};
- // base class for 'families' of phrase scorers that have a single
+ // base class for 'families' of phrase scorers that have a single
template<typename Token>
class
- SingleRealValuedParameterPhraseScorerFamily
+ SingleRealValuedParameterPhraseScorerFamily
: public PhraseScorer<Token>
{
protected:
vector<float> m_x;
- virtual
- void
- init(string const specs)
- {
+ virtual
+ void
+ init(string const specs)
+ {
using namespace boost;
- UTIL_THROW_IF2(this->m_tag.size() == 0,
+ UTIL_THROW_IF2(this->m_tag.size() == 0,
"m_tag must be initialized in constructor");
UTIL_THROW_IF2(specs.size() == 0,"empty specification string!");
UTIL_THROW_IF2(this->m_feature_names.size(),
diff --git a/moses/TranslationModel/UG/sapt_pscore_coherence.h b/moses/TranslationModel/UG/sapt_pscore_coherence.h
index a3211df54..c201c9651 100644
--- a/moses/TranslationModel/UG/sapt_pscore_coherence.h
+++ b/moses/TranslationModel/UG/sapt_pscore_coherence.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -13,16 +13,16 @@ namespace Moses {
PScoreCoherence : public PhraseScorer<Token>
{
public:
- PScoreCoherence(string const dummy)
- {
+ PScoreCoherence(string const dummy)
+ {
this->m_index = -1;
this->m_num_feats = 1;
this->m_feature_names.push_back(string("coherence"));
}
-
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
diff --git a/moses/TranslationModel/UG/sapt_pscore_lex1.h b/moses/TranslationModel/UG/sapt_pscore_lex1.h
index a8e83da51..76ca2a9a4 100644
--- a/moses/TranslationModel/UG/sapt_pscore_lex1.h
+++ b/moses/TranslationModel/UG/sapt_pscore_lex1.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Phrase scorer that counts the number of unaligend words in the phrase
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "sapt_pscore_base.h"
@@ -17,11 +17,11 @@ namespace Moses {
string m_lexfile;
public:
LexicalPhraseScorer2<Token> scorer;
-
- PScoreLex1(string const& alphaspec, string const& lexfile)
- {
+
+ PScoreLex1(string const& alphaspec, string const& lexfile)
+ {
this->m_index = -1;
- this->m_num_feats = 2;
+ this->m_num_feats = 2;
this->m_feature_names.reserve(2);
this->m_feature_names.push_back("lexfwd");
this->m_feature_names.push_back("lexbwd");
@@ -31,13 +31,13 @@ namespace Moses {
void
load()
- {
- scorer.open(m_lexfile);
+ {
+ scorer.open(m_lexfile);
}
-
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -48,27 +48,27 @@ namespace Moses {
cout << len1 << " " << len2 << endl;
Token const* t1 = bt.T1->sntStart(sid1);
for (size_t i = off1; i < off1 + len1; ++i)
- cout << (*bt.V1)[t1[i].id()] << " ";
+ cout << (*bt.V1)[t1[i].id()] << " ";
cout << __FILE__ << ":" << __LINE__ << endl;
-
+
Token const* t2 = bt.T2->sntStart(sid2);
for (size_t i = off2; i < off2 + len2; ++i)
- cout << (*bt.V2)[t2[i].id()] << " ";
+ cout << (*bt.V2)[t2[i].id()] << " ";
cout << __FILE__ << ":" << __LINE__ << endl;
-
+
BOOST_FOREACH (int a, pp.aln)
cout << a << " " ;
cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
-
+
scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
bt.T2->sntStart(sid2)+off2,0,len2,
pp.aln, m_alpha,
(*dest)[this->m_index],
(*dest)[this->m_index+1]);
#endif
- scorer.score(pp.start1,0, pp.len1,
- pp.start2,0, pp.len2, pp.aln, m_alpha,
- (*dest)[this->m_index],
+ scorer.score(pp.start1,0, pp.len1,
+ pp.start2,0, pp.len2, pp.aln, m_alpha,
+ (*dest)[this->m_index],
(*dest)[this->m_index+1]);
}
};
diff --git a/moses/TranslationModel/UG/sapt_pscore_logcnt.h b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
index 2790323ed..9dc5ac7ba 100644
--- a/moses/TranslationModel/UG/sapt_pscore_logcnt.h
+++ b/moses/TranslationModel/UG/sapt_pscore_logcnt.h
@@ -2,7 +2,7 @@
// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
// with the asymptotic function x/(j+x) where x > 0 is a function
// parameter that determines the steepness of the rewards curve
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -10,15 +10,15 @@
using namespace std;
namespace Moses {
namespace bitext {
-
+
template<typename Token>
class
PScoreLogCnt : public PhraseScorer<Token>
{
string m_specs;
public:
- PScoreLogCnt(string const specs)
- {
+ PScoreLogCnt(string const specs)
+ {
this->m_index = -1;
this->m_specs = specs;
if (specs.find("r1") != string::npos) // raw source phrase counts
@@ -35,11 +35,11 @@ namespace Moses {
}
bool
- isIntegerValued(int i) const { return true; }
+ isIntegerValued(int i) const { return true; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -49,15 +49,15 @@ namespace Moses {
assert(pp.joint);
assert(pp.raw2);
size_t i = this->m_index;
- if (m_specs.find("r1") != string::npos)
+ if (m_specs.find("r1") != string::npos)
(*dest)[i++] = log(pp.raw1);
- if (m_specs.find("s1") != string::npos)
+ if (m_specs.find("s1") != string::npos)
(*dest)[i++] = log(pp.sample1);
- if (m_specs.find("g1") != string::npos)
+ if (m_specs.find("g1") != string::npos)
(*dest)[i++] = log(pp.good1);
- if (m_specs.find("j") != string::npos)
+ if (m_specs.find("j") != string::npos)
(*dest)[i++] = log(pp.joint);
- if (m_specs.find("r2") != string::npos)
+ if (m_specs.find("r2") != string::npos)
(*dest)[++i] = log(pp.raw2);
}
};
diff --git a/moses/TranslationModel/UG/sapt_pscore_pbwd.h b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
index f7b4686d7..9366777ef 100644
--- a/moses/TranslationModel/UG/sapt_pscore_pbwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pbwd.h
@@ -1,5 +1,5 @@
//-*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -15,12 +15,12 @@ namespace Moses {
{
float conf;
string denom;
-
+
public:
- PScorePbwd(float const c, string d)
- {
+ PScorePbwd(float const c, string d)
+ {
this->m_index = -1;
- conf = c;
+ conf = c;
denom = d;
size_t checksum = d.size();
BOOST_FOREACH(char const& x, denom)
@@ -36,13 +36,13 @@ namespace Moses {
<< d << "' for Pbwd phrase scorer at " << HERE);
}
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
- // we use the denominator specification to scale the raw counts on the
+ // we use the denominator specification to scale the raw counts on the
// target side; the clean way would be to counter-sample
size_t i = this->m_index;
BOOST_FOREACH(char const& x, denom)
diff --git a/moses/TranslationModel/UG/sapt_pscore_pfwd.h b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
index ed48a93d2..c5de210a1 100644
--- a/moses/TranslationModel/UG/sapt_pscore_pfwd.h
+++ b/moses/TranslationModel/UG/sapt_pscore_pfwd.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -18,10 +18,10 @@ namespace Moses {
public:
- PScorePfwd(float const c, string d)
- {
+ PScorePfwd(float const c, string d)
+ {
this->m_index = -1;
- conf = c;
+ conf = c;
denom = d;
size_t checksum = d.size();
BOOST_FOREACH(char const& x, denom)
@@ -32,17 +32,17 @@ namespace Moses {
this->m_feature_names.push_back(s);
}
this->m_num_feats = this->m_feature_names.size();
- UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
- "Unknown parameter in specification '"
+ UTIL_THROW_IF2(this->m_feature_names.size() != checksum,
+ "Unknown parameter in specification '"
<< d << "' for Pfwd phrase scorer at " << HERE);
}
-
- void
- operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp,
+
+ void
+ operator()(Bitext<Token> const& bt, PhrasePair<Token> & pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
- if (pp.joint > pp.good1)
+ if (pp.joint > pp.good1)
{
pp.joint = pp.good1;
// cerr<<bt.toString(pp.p1,0)<<" ::: "<<bt.toString(pp.p2,1)<<endl;
@@ -53,18 +53,18 @@ namespace Moses {
{
switch (c)
{
- case 'g':
- (*dest)[i++] = log(lbop(pp.good1, pp.joint, conf));
+ case 'g':
+ (*dest)[i++] = log(lbop(pp.good1, pp.joint, conf));
break;
- case 's':
- (*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf));
+ case 's':
+ (*dest)[i++] = log(lbop(pp.sample1, pp.joint, conf));
break;
case 'r':
- (*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf));
+ (*dest)[i++] = log(lbop(pp.raw1, pp.joint, conf));
}
}
}
};
}
}
-
+
diff --git a/moses/TranslationModel/UG/sapt_pscore_phrasecount.h b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
index e0a6eb48b..e0ce40117 100644
--- a/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
+++ b/moses/TranslationModel/UG/sapt_pscore_phrasecount.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -20,15 +20,15 @@ namespace Moses {
this->m_num_feats = 1;
this->m_feature_names.push_back(string("phrasecount"));
}
-
- void
+
+ void
operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
(*dest)[this->m_index] = 1;
- }
+ }
};
}
}
diff --git a/moses/TranslationModel/UG/sapt_pscore_provenance.h b/moses/TranslationModel/UG/sapt_pscore_provenance.h
index c33b98fe7..ee7b08bda 100644
--- a/moses/TranslationModel/UG/sapt_pscore_provenance.h
+++ b/moses/TranslationModel/UG/sapt_pscore_provenance.h
@@ -2,7 +2,7 @@
// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
// with the asymptotic function j/(j+x) where x > 0 is a function
// parameter that determines the steepness of the rewards curve
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -10,7 +10,7 @@
using namespace std;
namespace Moses {
namespace bitext {
-
+
// asymptotic provenance feature n/(n+x)
template<typename Token>
class
@@ -18,18 +18,18 @@ namespace Moses {
{
public:
- PScoreProvenance(string const& spec)
+ PScoreProvenance(string const& spec)
{
this->m_tag = "prov";
this->init(spec);
}
-
+
bool
- isLogVal(int i) const { return false; }
+ isLogVal(int i) const { return false; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -39,7 +39,7 @@ namespace Moses {
}
bool
- allowPooling() const
+ allowPooling() const
{ return false; }
};
diff --git a/moses/TranslationModel/UG/sapt_pscore_rareness.h b/moses/TranslationModel/UG/sapt_pscore_rareness.h
index 58f204c88..34979243c 100644
--- a/moses/TranslationModel/UG/sapt_pscore_rareness.h
+++ b/moses/TranslationModel/UG/sapt_pscore_rareness.h
@@ -2,7 +2,7 @@
// Phrase scorer that rewards the number of phrase pair occurrences in a bitext
// with the asymptotic function x/(j+x) where x > 0 is a function
// parameter that determines the steepness of the rewards curve
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -10,25 +10,25 @@
using namespace std;
namespace Moses {
namespace bitext {
-
+
// rareness penalty: x/(n+x)
template<typename Token>
class
PScoreRareness : public SingleRealValuedParameterPhraseScorerFamily<Token>
{
public:
- PScoreRareness(string const spec)
+ PScoreRareness(string const spec)
{
this->m_tag = "rare";
this->init(spec);
}
bool
- isLogVal(int i) const { return false; }
+ isLogVal(int i) const { return false; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
diff --git a/moses/TranslationModel/UG/sapt_pscore_unaligned.h b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
index dafc1e129..8dceb1ad0 100644
--- a/moses/TranslationModel/UG/sapt_pscore_unaligned.h
+++ b/moses/TranslationModel/UG/sapt_pscore_unaligned.h
@@ -1,6 +1,6 @@
// -*- c++ -*-
// Phrase scorer that counts the number of unaligend words in the phrase
-// written by Ulrich Germann
+// written by Ulrich Germann
#include "sapt_pscore_base.h"
#include <boost/dynamic_bitset.hpp>
@@ -14,7 +14,7 @@ namespace Moses {
{
typedef boost::dynamic_bitset<typename ::uint64_t> bitvector;
public:
- PScoreUnaligned(string const spec)
+ PScoreUnaligned(string const spec)
{
this->m_index = -1;
int f = this->m_num_feats = atoi(spec.c_str());
@@ -28,16 +28,16 @@ namespace Moses {
this->m_feature_names[1] = "unal-t";
}
}
-
+
bool
- isLogVal(int i) const { return false; }
-
+ isLogVal(int i) const { return false; }
+
bool
- isIntegerValued(int i) const { return true; }
+ isIntegerValued(int i) const { return true; }
- void
- operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ void
+ operator()(Bitext<Token> const& bt,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
@@ -46,9 +46,9 @@ namespace Moses {
// parse_pid(pp.p2, sid2, off2, len2);
bitvector check1(pp.len1),check2(pp.len2);
for (size_t i = 0; i < pp.aln.size(); )
- {
- check1.set(pp.aln[i++]);
- check2.set(pp.aln.at(i++));
+ {
+ check1.set(pp.aln[i++]);
+ check2.set(pp.aln.at(i++));
}
if (this->m_num_feats == 1)
diff --git a/moses/TranslationModel/UG/sapt_pscore_wordcount.h b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
index 3227bb6ba..a5000be37 100644
--- a/moses/TranslationModel/UG/sapt_pscore_wordcount.h
+++ b/moses/TranslationModel/UG/sapt_pscore_wordcount.h
@@ -1,5 +1,5 @@
// -*- c++ -*-
-// written by Ulrich Germann
+// written by Ulrich Germann
#pragma once
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "util/exception.hh"
@@ -13,7 +13,7 @@ namespace Moses {
class
PScoreWC : public PhraseScorer<Token>
{
- public:
+ public:
PScoreWC(string const dummy)
{
this->m_index = -1;
@@ -21,14 +21,14 @@ namespace Moses {
this->m_feature_names.push_back(string("wordcount"));
}
- void
+ void
operator()(Bitext<Token> const& bt,
- PhrasePair<Token>& pp,
+ PhrasePair<Token>& pp,
vector<float> * dest = NULL) const
{
if (!dest) dest = &pp.fvals;
(*dest)[this->m_index] = pp.len2;
- }
+ }
};
}
}
diff --git a/moses/TranslationModel/UG/sim-pe.cc b/moses/TranslationModel/UG/sim-pe.cc
index 460d66c1f..00a705936 100644
--- a/moses/TranslationModel/UG/sim-pe.cc
+++ b/moses/TranslationModel/UG/sim-pe.cc
@@ -15,7 +15,7 @@ using namespace boost;
vector<FactorType> fo(1,FactorType(0));
-ostream&
+ostream&
operator<<(ostream& out, Hypothesis const* x)
{
vector<const Hypothesis*> H;
@@ -24,7 +24,7 @@ operator<<(ostream& out, Hypothesis const* x)
for (; H.size(); H.pop_back())
{
Phrase const& p = H.back()->GetCurrTargetPhrase();
- for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
+ for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
out << *p.GetFactor(pos, 0) << (H.size() ? " " : "");
}
return out;
@@ -33,19 +33,19 @@ operator<<(ostream& out, Hypothesis const* x)
vector<FactorType> ifo;
size_t lineNumber;
-string
+string
translate(string const& source)
{
StaticData const& global = StaticData::Instance();
- Sentence sentence;
- istringstream ibuf(source+"\n");
+ Sentence sentence;
+ istringstream ibuf(source+"\n");
sentence.Read(ibuf,ifo);
// Manager manager(lineNumber, sentence, global.GetSearchAlgorithm());
Manager manager(sentence, global.GetSearchAlgorithm());
manager.ProcessSentence();
-
+
ostringstream obuf;
const Hypothesis* h = manager.GetBestHypothesis();
obuf << h;
@@ -58,7 +58,7 @@ int main(int argc, char* argv[])
Parameter params;
if (!params.LoadParam(argc,argv) || !StaticData::LoadDataStatic(&params, argv[0]))
exit(1);
-
+
StaticData const& global = StaticData::Instance();
global.SetVerboseLevel(0);
ifo = global.GetInputFactorOrder();
@@ -79,6 +79,6 @@ int main(int argc, char* argv[])
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage.cc b/moses/TranslationModel/UG/spe-check-coverage.cc
index 6e838ad04..378dd800f 100644
--- a/moses/TranslationModel/UG/spe-check-coverage.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage.cc
@@ -24,13 +24,13 @@ class SimplePhrase : public Moses::Phrase
vector<FactorType> const m_fo; // factor order
public:
SimplePhrase(): m_fo(1,FactorType(0)) {}
-
- void init(string const& s)
+
+ void init(string const& s)
{
istringstream buf(s); string w;
- while (buf >> w)
+ while (buf >> w)
{
- Word wrd;
+ Word wrd;
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
}
}
@@ -45,7 +45,7 @@ public:
bool operator()(size_t a, size_t b) const
{
// return cmp(*my_tpc[a], *my_tpc[b]);
- return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
+ return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
my_tpc[b]->GetScoreBreakdown().GetWeightedScore());
}
};
@@ -59,7 +59,7 @@ int main(int argc, char* argv[])
argfilter[1] = std::make_pair(string("--spe-trg"),1);
argfilter[2] = std::make_pair(string("--spe-aln"),1);
argfilter[3] = std::make_pair(string("--spe-show"),1);
-
+
char** my_args; int my_acnt;
char** mo_args; int mo_acnt;
filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
@@ -77,9 +77,9 @@ int main(int argc, char* argv[])
else if (!strcmp(my_args[i],"--spe-show"))
vlevel = my_args[i+1];
}
-
+
Parameter params;
- if (!params.LoadParam(mo_acnt,mo_args) ||
+ if (!params.LoadParam(mo_acnt,mo_args) ||
!StaticData::LoadDataStatic(&params, mo_args[0]))
exit(1);
@@ -95,15 +95,15 @@ int main(int argc, char* argv[])
exit(1);
}
mmsapt->SetTableLimit(0);
-
+
string srcline,trgline,alnline;
cout.precision(2);
vector<string> fname = mmsapt->GetFeatureNames();
while (getline(spe_src,srcline))
{
- UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
+ UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
<< ": missing data for online updates.");
- UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
+ UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
<< ": missing data for online updates.");
cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl;
@@ -127,29 +127,29 @@ int main(int argc, char* argv[])
if (!mmsapt->PrefixExists(p)) break;
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
if (!trg || !trg->GetSize()) continue;
-
+
bool header_done = false;
bool has_dynamic_match = vlevel == "all" || vlevel == "ALL";
- vector<size_t> order; order.reserve(trg->GetSize());
+ vector<size_t> order; order.reserve(trg->GetSize());
size_t stop = trg->GetSize();
vector<size_t> o2(trg->GetSize());
for (size_t i = 0; i < stop; ++i) o2[i] = i;
sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg));
-
+
for (size_t r = 0; r < stop; ++r) // r for rank
{
if (vlevel != "ALL")
{
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
- ostringstream buf; buf << phr;
- string tphrase = buf.str();
+ ostringstream buf; buf << phr;
+ string tphrase = buf.str();
tphrase.erase(tphrase.size()-1);
size_t s = trgline.find(tphrase);
if (s == string::npos) continue;
size_t e = s + tphrase.size();
if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' '))
- continue;
+ continue;
}
order.push_back(r);
if (!has_dynamic_match)
@@ -170,7 +170,7 @@ int main(int argc, char* argv[])
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
FVector const& scores = scc.GetScoresVector();
float wscore = scc.GetWeightedScore();
- if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
+ if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
continue;
if (!header_done)
{
@@ -201,7 +201,7 @@ int main(int argc, char* argv[])
}
cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f);
}
- cout << " " << format("%10.3e") % exp(wscore)
+ cout << " " << format("%10.3e") % exp(wscore)
<< " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl;
}
mmsapt->Release(trg);
@@ -213,6 +213,6 @@ int main(int argc, char* argv[])
// }
exit(0);
}
-#endif
-
+#endif
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage2.cc b/moses/TranslationModel/UG/spe-check-coverage2.cc
index fa9ce1c85..3b4f559d2 100644
--- a/moses/TranslationModel/UG/spe-check-coverage2.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage2.cc
@@ -20,7 +20,7 @@ typedef Bitext<Token>::iter iter;
mmbitext bg;
-void
+void
show(ostream& out, iter& f)
{
iter b(bg.I2.get(),f.getToken(0),f.size());
@@ -29,11 +29,11 @@ show(ostream& out, iter& f)
else
out << string(12,' ');
out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
- out << f.str(bg.V1.get()) << endl;
+ out << f.str(bg.V1.get()) << endl;
}
-void
+void
dump(ostream& out, iter& f)
{
float cnt = f.size() ? f.approxOccurrenceCount() : 0;
@@ -44,12 +44,12 @@ dump(ostream& out, iter& f)
while (f.over());
f.up();
}
- if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
+ if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
show(out,f);
}
-void
+void
read_data(string fname, vector<string>& dest)
{
ifstream in(fname.c_str());
@@ -71,6 +71,6 @@ int main(int argc, char* argv[])
dump(cout,mfg);
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/spe-check-coverage3.cc b/moses/TranslationModel/UG/spe-check-coverage3.cc
index ea8c85e99..a62daa7b8 100644
--- a/moses/TranslationModel/UG/spe-check-coverage3.cc
+++ b/moses/TranslationModel/UG/spe-check-coverage3.cc
@@ -22,7 +22,7 @@ typedef Bitext<Token>::iter iter;
mmbitext bg;
vector<string> src,trg,aln;
-void
+void
show(ostream& out, iter& f)
{
iter b(bg.I2.get(),f.getToken(0),f.size());
@@ -31,11 +31,11 @@ show(ostream& out, iter& f)
else
out << string(12,' ');
out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
- out << f.str(bg.V1.get()) << endl;
+ out << f.str(bg.V1.get()) << endl;
}
-void
+void
dump(ostream& out, iter& f)
{
float cnt = f.size() ? f.approxOccurrenceCount() : 0;
@@ -46,12 +46,12 @@ dump(ostream& out, iter& f)
while (f.over());
f.up();
}
- if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
+ if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
show(out,f);
}
-void
+void
read_data(string fname, vector<string>& dest)
{
ifstream in(fname.c_str());
@@ -60,14 +60,14 @@ read_data(string fname, vector<string>& dest)
in.close();
}
-void
-show_snt(ostream& out, TokenIndex const& V, vector<Token> const& snt,
+void
+show_snt(ostream& out, TokenIndex const& V, vector<Token> const& snt,
vector<vector<int> > const& a)
{
for (size_t i = 0; i < snt.size(); ++i)
{
cout << format("%d:%s[") % i % V[snt[i].id()];
- for (size_t k = 0; k < a[i].size(); ++k)
+ for (size_t k = 0; k < a[i].size(); ++k)
cout << (k?",":"") << a[i][k];
cout << "] ";
}
@@ -77,7 +77,7 @@ show_snt(ostream& out, TokenIndex const& V, vector<Token> const& snt,
void show_pair(size_t const sid)
{
- vector<Token> s,t;
+ vector<Token> s,t;
fill_token_seq(*bg.V1,src[sid],s);
fill_token_seq(*bg.V2,trg[sid],t);
vector<vector<int> > a1(s.size()),a2(t.size());
@@ -97,11 +97,11 @@ void show_pair(size_t const sid)
int main(int argc, char* argv[])
{
- if (argc < 5)
+ if (argc < 5)
{
- cerr << "usage: " << argv[0]
- << " <bg base name> <L1> <L2> <fg base name>"
- << endl;
+ cerr << "usage: " << argv[0]
+ << " <bg base name> <L1> <L2> <fg base name>"
+ << endl;
exit(1);
}
bg.open(argv[1],argv[2],argv[3]);
@@ -122,10 +122,10 @@ int main(int argc, char* argv[])
bias[sid] = 0;
// cout << src[sid] << endl << trg[sid] << endl;
// show_pair(sid);
- vector<Token> snt;
+ vector<Token> snt;
fill_token_seq(*bg.V1,src[sid],snt);
vector<vector<sptr<vector<PhrasePair<Token> > > > > FG,BG;
- fg->lookup(snt,*fg->I1,FG,NULL,NULL,&bias,true);
+ fg->lookup(snt,*fg->I1,FG,NULL,NULL,&bias,true);
bg.lookup(snt,*bg.I1,BG,NULL,NULL,NULL,true);
set<sptr<vector<PhrasePair<Token> > > > seen;
for (size_t i = 0; i < snt.size(); ++i)
@@ -136,7 +136,7 @@ int main(int argc, char* argv[])
{
if (!m0.extend(snt[i+k].id())) break;
if (k && m0.approxOccurrenceCount() < 2) break;
- if (m1.size() == k && (!m1.extend(snt[i+k].id()) ||
+ if (m1.size() == k && (!m1.extend(snt[i+k].id()) ||
m1.approxOccurrenceCount() < 25))
{
cout << toString((*fg->V1), m0.getToken(0), m0.size()) << " "
@@ -156,8 +156,8 @@ int main(int argc, char* argv[])
sptr<pstats> bgstats;
jstats const* bgjstats = NULL;
Bitext<Token>::iter m2(bg.I2.get(), pp.start2, pp.len2);
- if (m1.approxOccurrenceCount() > 5000 ||
- m2.approxOccurrenceCount() > 5000)
+ if (m1.approxOccurrenceCount() > 5000 ||
+ m2.approxOccurrenceCount() > 5000)
continue;
if (m1.size() == pp.len1 && m2.size() == pp.len2)
{
@@ -173,9 +173,9 @@ int main(int argc, char* argv[])
cout << toString(*fg->V1, pp.start1, pp.len1) << " ::: "
<< toString(*fg->V2, pp.start2, pp.len2) << " "
<< format("[%u/%u/%u]") % pp.good1 % pp.joint % pp.good2;
- if (bgjstats)
- cout << " " << (format("[%u/%u/%u]")
- % bgstats->good % bgjstats->rcnt()
+ if (bgjstats)
+ cout << " " << (format("[%u/%u/%u]")
+ % bgstats->good % bgjstats->rcnt()
% (bgjstats->cnt2() * bgstats->good
/ bgstats->raw_cnt));
else if (m1.size() == pp.len1)
@@ -189,6 +189,6 @@ int main(int argc, char* argv[])
}
exit(0);
}
-
-
+
+
diff --git a/moses/TranslationModel/UG/try-align.cc b/moses/TranslationModel/UG/try-align.cc
index daafec545..60eabb9e7 100644
--- a/moses/TranslationModel/UG/try-align.cc
+++ b/moses/TranslationModel/UG/try-align.cc
@@ -17,7 +17,7 @@ float lbop_level = .05;
namespace stats
{
using namespace Moses::bitext;
- float
+ float
pmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -29,8 +29,8 @@ namespace stats
return log(j) + log(N) - log(m1) - log(m2);
#endif
}
-
- float
+
+ float
npmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -39,11 +39,11 @@ namespace stats
float p12 = lbop(N,j,lbop_level);
return (log(p12) - log(p1) - log(p2)) / -log(p12);
#else
- return pmi(j,m1,m2,N) / (log(N) - log(j));
+ return pmi(j,m1,m2,N) / (log(N) - log(j));
#endif
}
- float
+ float
mi(size_t j,size_t m1, size_t m2, size_t N)
{
float ret = 0;
@@ -79,7 +79,7 @@ struct PhrasePair
float mi; // mutual information
float score;
- void
+ void
set(vector<ttrack::Position> const& o1,
vector<ttrack::Position> const& o2,
size_t const N)
@@ -90,7 +90,7 @@ struct PhrasePair
{
if (i1 && o1[i1].sid == o1[i1-1].sid) { ++i1; continue; }
if (i2 && o2[i2].sid == o2[i2-1].sid) { ++i2; continue; }
-
+
if (o1[i1].sid == o2[i2].sid) { ++j; ++i1; ++i2; ++m1; ++m2; }
else if (o1[i1].sid < o2[i2].sid) { ++i1; ++m1; }
else { ++i2; ++m2; }
@@ -114,19 +114,19 @@ struct PhrasePair
this->score = npmi; // npmi; // hmean; // /sqrt(z);
}
} stats;
-
+
PhrasePair(ushort s1_=0, ushort e1_=0, ushort s2_=0, ushort e2_=0)
: s1(s1_), e1(e1_), s2(s2_), e2(e2_), parent(-1) { }
- bool
+ bool
operator<(PhrasePair const& other) const
- {
- return (this->stats.score == other.stats.score
+ {
+ return (this->stats.score == other.stats.score
? (e1-s1 + e2-s2 > other.e1-other.s1 + other.e2-other.s2)
- : (this->stats.score > other.stats.score));
+ : (this->stats.score > other.stats.score));
}
-
+
size_t len1() const { return e1 - s1; }
size_t len2() const { return e2 - s2; }
bool includes(PhrasePair const& o) const
@@ -142,8 +142,8 @@ PhrasePair::stats_t::cache_t ppcache;
struct SortByPositionInCorpus
{
- bool
- operator()(ttrack::Position const& a,
+ bool
+ operator()(ttrack::Position const& a,
ttrack::Position const& b) const
{
return a.sid != b.sid ? a.sid < b.sid : a.offset < b.offset;
@@ -151,8 +151,8 @@ struct SortByPositionInCorpus
};
-void
-getoccs(tsa_t::tree_iterator const& m,
+void
+getoccs(tsa_t::tree_iterator const& m,
vector<ttrack::Position>& occs)
{
occs.clear();
@@ -166,9 +166,9 @@ getoccs(tsa_t::tree_iterator const& m,
sort(occs.begin(),occs.end(),SortByPositionInCorpus());
}
-void
-lookup_phrases(vector<id_type> const& snt,
- TokenIndex& V, ttrack_t const& T,
+void
+lookup_phrases(vector<id_type> const& snt,
+ TokenIndex& V, ttrack_t const& T,
tsa_t const& I, SinglePhrase::cache_t& cache,
vector<vector<sptr<SinglePhrase> > >& dest)
{
@@ -182,7 +182,7 @@ lookup_phrases(vector<id_type> const& snt,
if (m.approxOccurrenceCount() < 3) break;
// if (k - i > 0) break;
sptr<SinglePhrase>& o = cache[m.getPid()];
- if (!o)
+ if (!o)
{
o.reset(new SinglePhrase());
o->pid = m.getPid();
@@ -193,7 +193,7 @@ lookup_phrases(vector<id_type> const& snt,
}
}
-struct
+struct
RowIndexSorter
{
vector<vector<float> > const& M;
@@ -202,14 +202,14 @@ RowIndexSorter
: M(m), my_col(c) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(a).at(my_col) > M.at(b).at(my_col);
}
};
-struct
+struct
ColIndexSorter
{
vector<vector<float> > const& M;
@@ -218,9 +218,9 @@ ColIndexSorter
: M(m), my_row(r) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(my_row).at(a) > M[my_row].at(b);
}
@@ -234,7 +234,7 @@ int main(int argc, char* argv[])
T1.reset(new ttrack_t());
T2.reset(new ttrack_t());
-
+
V1.open(base + L1 + ".tdx");
T1->open(base + L1 + ".mct");
I1.open(base + L1 + ".sfa", T1);
@@ -259,7 +259,7 @@ int main(int argc, char* argv[])
vector<PhrasePair> pp_all,pp_good;
vector<int> a1(snt1.size(),-1);
vector<int> a2(snt2.size(),-1);
-
+
vector<vector<int> > z1(snt1.size(),vector<int>(snt1.size(),-1));
vector<vector<int> > z2(snt2.size(),vector<int>(snt2.size(),-1));
vector<vector<vector<PhrasePair> > >ppm1(M1.size()),ppm2(M2.size());
@@ -282,9 +282,9 @@ int main(int argc, char* argv[])
for (size_t k2 = 0; k2 < M2[i2].size(); ++k2)
{
pp.e2 = i2 + k2 + 1;
- sptr<PhrasePair::stats_t> & s
+ sptr<PhrasePair::stats_t> & s
= ppcache[make_pair(M1[i1][k1]->pid,M2[i2][k2]->pid)];
- if (!s)
+ if (!s)
{
s.reset(new PhrasePair::stats_t());
s->set(M1[i1][k1]->occs,M2[i2][k2]->occs,T1->size());
@@ -294,8 +294,8 @@ int main(int argc, char* argv[])
// ppm1[i1][k1].push_back(pp);
// ppm2[i2][k2].push_back(pp);
size_t J = pp.stats.j * 100;
- if (pp.stats.score > 0
- && J >= pp.stats.m1
+ if (pp.stats.score > 0
+ && J >= pp.stats.m1
&& J > pp.stats.m2)
{ pp_all.push_back(pp); }
}
@@ -310,7 +310,7 @@ int main(int argc, char* argv[])
for (size_t r = pp.s1; r < pp.e1; ++r)
for (size_t c = pp.s2; c < pp.e2; ++c)
{
- // M[r][c] += log(1-pp.stats.npmi);
+ // M[r][c] += log(1-pp.stats.npmi);
M[r][c] += log(1-pp.stats.mi);
}
}
@@ -342,11 +342,11 @@ int main(int argc, char* argv[])
}
cout << endl;
}
-#endif
+#endif
#if 0
for (size_t k = 1; k < pp_all.size(); ++k)
for (size_t i = k; i--;)
- if (pp_all[i].s1 >= pp_all[k].s1 &&
+ if (pp_all[i].s1 >= pp_all[k].s1 &&
pp_all[i].e1 <= pp_all[k].e1 &&
pp_all[i].s2 >= pp_all[k].s2 &&
pp_all[i].e2 <= pp_all[k].e2)
@@ -360,35 +360,35 @@ int main(int argc, char* argv[])
{
PhrasePair const& x = pp_all[p];
// if (x.stats.npmi < .7) break;
- // if (z1[x.s1][x.e1-1] >= 0 || z2[x.s2][x.e2-1] >=0)
+ // if (z1[x.s1][x.e1-1] >= 0 || z2[x.s2][x.e2-1] >=0)
// continue;
- for (size_t i = x.s1; i < x.e1; ++i)
+ for (size_t i = x.s1; i < x.e1; ++i)
{
- if (assoc1[i] < 0)
+ if (assoc1[i] < 0)
assoc1[i] = p;
else
{
// PhrasePair& y = pp_all[assoc1[i]];
- // if (y.includes(x))
+ // if (y.includes(x))
// assoc1[i] = p;
}
}
- for (size_t i = x.s2; i < x.e2; ++i)
+ for (size_t i = x.s2; i < x.e2; ++i)
{
- if (assoc2[i] < 0)
+ if (assoc2[i] < 0)
assoc2[i] = p;
else
{
// PhrasePair& y = pp_all[assoc2[i]];
- // if (y.includes(x))
+ // if (y.includes(x))
// assoc2[i] = p;
}
}
z1[x.s1][x.e1-1] = p;
z2[x.s2][x.e2-1] = p;
continue;
- cout << (boost::format("%.4f %.8f %.4f")
- % x.stats.score
+ cout << (boost::format("%.4f %.8f %.4f")
+ % x.stats.score
% x.stats.mi
% x.stats.npmi);
for (size_t z = x.s1; z < x.e1; ++z)
@@ -396,8 +396,8 @@ int main(int argc, char* argv[])
cout << " :::";
for (size_t z = x.s2; z < x.e2; ++z)
cout << " " << V2[snt2[z]];
- cout << " ["
- << x.stats.m1 << "/" << x.stats.j << "/" << x.stats.m2
+ cout << " ["
+ << x.stats.m1 << "/" << x.stats.j << "/" << x.stats.m2
<< "]" << endl;
}
vector<bool> done(pp_all.size(),false);
@@ -415,8 +415,8 @@ int main(int argc, char* argv[])
cout << " ::: ";
for (size_t j = p.s2; j < p.e2; ++j)
cout << j << ":" << V2[snt2[j]] << " ";
- cout << "["
- << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
+ cout << "["
+ << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
<< "] "<< p.stats.score << endl;
// break;
}
@@ -433,20 +433,20 @@ int main(int argc, char* argv[])
cout << " ::: ";
for (size_t j = p.s2; j < p.e2; ++j)
cout << j << ":" << V2[snt2[j]] << " ";
- cout << "["
- << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
+ cout << "["
+ << p.stats.m1 << "/" << p.stats.j << "/" << p.stats.m2
<< "] "<< p.stats.score << endl;
}
-#endif
+#endif
// sort(pp_all.begin(),pp_all.end());
// BOOST_FOREACH(PhrasePair const& pp, pp_all)
// {
- // while (ppm1[pp.s1].size() < pp.e1 - pp.s1)
+ // while (ppm1[pp.s1].size() < pp.e1 - pp.s1)
// ppm1[pp.s1].push_back(vector<PhrasePair>());
// vector<PhrasePair>& v1 = ppm1[pp.s1][pp.e1-pp.s1-1];
// if (v1.size() && v1[0].stats.score > pp.stats.score)
// continue;
- // while (ppm2[pp.s2].size() < pp.e2 - pp.s2)
+ // while (ppm2[pp.s2].size() < pp.e2 - pp.s2)
// ppm2[pp.s2].push_back(vector<PhrasePair>());
// vector<PhrasePair>& v2 = ppm2[pp.s2][pp.e2-pp.s2-1];
// if (v2.size() && v2[0].stats.score > pp.stats.score)
@@ -455,12 +455,12 @@ int main(int argc, char* argv[])
// v2.push_back(pp);
// }
-
+
// BOOST_FOREACH(vector<vector<PhrasePair> >& vv, ppm1)
- // {
- // BOOST_FOREACH(vector<PhrasePair>& v, vv)
- // {
- // sort(v.begin(),v.end());
+ // {
+ // BOOST_FOREACH(vector<PhrasePair>& v, vv)
+ // {
+ // sort(v.begin(),v.end());
// if (v.size() > 1 && v[0].stats.score == v[1].stats.score)
// v.clear();
// }
@@ -468,19 +468,19 @@ int main(int argc, char* argv[])
// for (size_t i2 = 0; i2 < ppm2.size(); ++i2)
// {
// for (size_t k2 = 0; k2 < ppm2[i2].size(); ++k2)
- // {
+ // {
// vector<PhrasePair>& v2 = ppm2[i2][k2];
// sort(v2.begin(),v2.end());
- // if (v2.size() > 1 && v2[0].stats.score == v2[1].stats.score)
+ // if (v2.size() > 1 && v2[0].stats.score == v2[1].stats.score)
// {
// v2.clear();
// continue;
// }
// ushort i1 = v2[0].s1;
// ushort k1 = v2[0].e1 - i1 -1;
-
- // if (ppm1[i1][k1].size() == 0 ||
- // ppm1[i1][k1][0].s2 != i2 ||
+
+ // if (ppm1[i1][k1].size() == 0 ||
+ // ppm1[i1][k1][0].s2 != i2 ||
// ppm1[i1][k1][0].e2 != i2 + k2 + 1)
// { v2.clear(); }
// else pp_good.push_back(ppm2[i2][k2][0]);
@@ -508,7 +508,7 @@ int main(int argc, char* argv[])
// // cout << V2[snt2[z]] << " ";
// // cout << pp.m1 << "/" << pp.j << "/" << pp.m2 << endl;
// // }
-
+
}
}
diff --git a/moses/TranslationModel/UG/try-align2.cc b/moses/TranslationModel/UG/try-align2.cc
index 57cf25035..a18ce8d92 100644
--- a/moses/TranslationModel/UG/try-align2.cc
+++ b/moses/TranslationModel/UG/try-align2.cc
@@ -29,7 +29,7 @@ float lbop_level = .05;
namespace stats
{
using namespace Moses::bitext;
- float
+ float
pmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -41,8 +41,8 @@ namespace stats
return log(j) + log(N) - log(m1) - log(m2);
#endif
}
-
- float
+
+ float
npmi(size_t j,size_t m1, size_t m2, size_t N)
{
#if smooth
@@ -52,11 +52,11 @@ namespace stats
float p12 = lbop(N,j,lbop_level);
return (log(p12) - log(p1) - log(p2)) / -log(p12);
#else
- return pmi(j,m1,m2,N) / (log(N) - log(j));
+ return pmi(j,m1,m2,N) / (log(N) - log(j));
#endif
}
- float
+ float
mi(size_t j,size_t m1, size_t m2, size_t N)
{
float ret = 0;
@@ -92,7 +92,7 @@ struct PhrasePair2
float mi; // mutual information
float score;
- void
+ void
set(vector<ttrack::Position> const& o1,
vector<ttrack::Position> const& o2,
size_t const N)
@@ -103,7 +103,7 @@ struct PhrasePair2
{
if (i1 && o1[i1].sid == o1[i1-1].sid) { ++i1; continue; }
if (i2 && o2[i2].sid == o2[i2-1].sid) { ++i2; continue; }
-
+
if (o1[i1].sid == o2[i2].sid) { ++j; ++i1; ++i2; ++m1; ++m2; }
else if (o1[i1].sid < o2[i2].sid) { ++i1; ++m1; }
else { ++i2; ++m2; }
@@ -127,19 +127,19 @@ struct PhrasePair2
this->score = npmi; // npmi; // hmean; // /sqrt(z);
}
} stats;
-
+
PhrasePair2(ushort s1_=0, ushort e1_=0, ushort s2_=0, ushort e2_=0)
: s1(s1_), e1(e1_), s2(s2_), e2(e2_), parent(-1) { }
- bool
+ bool
operator<(PhrasePair2 const& other) const
- {
- return (this->stats.score == other.stats.score
+ {
+ return (this->stats.score == other.stats.score
? (e1-s1 + e2-s2 > other.e1-other.s1 + other.e2-other.s2)
- : (this->stats.score > other.stats.score));
+ : (this->stats.score > other.stats.score));
}
-
+
size_t len1() const { return e1 - s1; }
size_t len2() const { return e2 - s2; }
bool includes(PhrasePair2 const& o) const
@@ -155,8 +155,8 @@ PhrasePair2::stats_t::cache_t ppcache;
struct SortByPositionInCorpus
{
- bool
- operator()(ttrack::Position const& a,
+ bool
+ operator()(ttrack::Position const& a,
ttrack::Position const& b) const
{
return a.sid != b.sid ? a.sid < b.sid : a.offset < b.offset;
@@ -164,8 +164,8 @@ struct SortByPositionInCorpus
};
-void
-getoccs(tsa_t::tree_iterator const& m,
+void
+getoccs(tsa_t::tree_iterator const& m,
vector<ttrack::Position>& occs)
{
occs.clear();
@@ -179,9 +179,9 @@ getoccs(tsa_t::tree_iterator const& m,
sort(occs.begin(),occs.end(),SortByPositionInCorpus());
}
-void
-lookup_phrases(vector<id_type> const& snt,
- TokenIndex& V, ttrack_t const& T,
+void
+lookup_phrases(vector<id_type> const& snt,
+ TokenIndex& V, ttrack_t const& T,
tsa_t const& I, SinglePhrase::cache_t& cache,
vector<vector<sptr<SinglePhrase> > >& dest)
{
@@ -195,7 +195,7 @@ lookup_phrases(vector<id_type> const& snt,
if (m.approxOccurrenceCount() < 3) break;
// if (k - i > 0) break;
sptr<SinglePhrase>& o = cache[m.getPid()];
- if (!o)
+ if (!o)
{
o.reset(new SinglePhrase());
o->pid = m.getPid();
@@ -207,7 +207,7 @@ lookup_phrases(vector<id_type> const& snt,
}
-struct
+struct
RowIndexSorter
{
vector<vector<float> > const& M;
@@ -216,14 +216,14 @@ RowIndexSorter
: M(m), my_col(c) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(a).at(my_col) > M.at(b).at(my_col);
}
};
-struct
+struct
ColIndexSorter
{
vector<vector<float> > const& M;
@@ -232,9 +232,9 @@ ColIndexSorter
: M(m), my_row(r) { }
template<typename T>
- bool
- operator()(T const& a, T const& b) const
- {
+ bool
+ operator()(T const& a, T const& b) const
+ {
return M.at(my_row).at(a) > M[my_row].at(b);
}
@@ -249,7 +249,7 @@ public:
{
#if 0
cout << pp.raw1 << " " << pp.sample1 << " " << pp.good1 << " "
- << pp.raw2 << " " << pp.sample2 << " " << pp.good2 << " "
+ << pp.raw2 << " " << pp.sample2 << " " << pp.good2 << " "
<< pp.joint << " " << __FILE__ << ":" << __LINE__ << endl;
#endif
pp.good2 = ceil(pp.raw2 * float(pp.good1)/pp.raw1);
@@ -266,7 +266,7 @@ class Alnhyp
};
-size_t
+size_t
lcs(string const a, string const b)
{
using namespace stringdist;
@@ -279,10 +279,10 @@ lcs(string const a, string const b)
{
StringDiff::Segment const& s = diff[i];
if (s.match != StringDiff::same && s.match != StringDiff::cap)
- {
+ {
if (len > ret) ret = len;
- len = 0;
- continue;
+ len = 0;
+ continue;
}
len += s.end_a - s.start_a;
}
@@ -290,9 +290,9 @@ lcs(string const a, string const b)
return ret;
}
-size_t
-mapstring(string const& utf8,
- UnicodeString& U,
+size_t
+mapstring(string const& utf8,
+ UnicodeString& U,
vector<int>& c2w,
vector<int>* wlen=NULL)
{
@@ -338,10 +338,10 @@ align_letters(UnicodeString const& A, vector<int> const& a2p,
// }
}
-void
+void
map_back(vector<vector<int> > const& W,
vector<vector<int> > & X,
- vector<uchar> const & aln)
+ vector<uchar> const & aln)
{
for (size_t i = 0; i < aln.size(); i += 2)
{
@@ -354,7 +354,7 @@ map_back(vector<vector<int> > const& W,
}
-void trymatch3(vector<PhrasePair<Token> > const& tcands,
+void trymatch3(vector<PhrasePair<Token> > const& tcands,
UnicodeString const& T, size_t const tlen,
vector<int> const& t2p,
TokenIndex const& V2, vector<vector<int> >&X)
@@ -374,8 +374,8 @@ void trymatch3(vector<PhrasePair<Token> > const& tcands,
cout << slen << " " << tlen << endl;
cout << "W: " << W.size() << " rows; " << W[0].size() << " cols" << endl;
cout << "X: " << X.size() << " rows; " << X[0].size() << " cols" << endl;
- cout << "aln: ";
- for (size_t a = 0; a < pp.aln.size(); a +=2)
+ cout << "aln: ";
+ for (size_t a = 0; a < pp.aln.size(); a +=2)
cout << int(pp.aln[a]) << "-" << int(pp.aln[a+1]) << " ";
cout << endl;
#endif
@@ -383,7 +383,7 @@ void trymatch3(vector<PhrasePair<Token> > const& tcands,
}
}
-void minmatch_filter(vector<vector<int> > & X,
+void minmatch_filter(vector<vector<int> > & X,
vector<int> const& len1,
vector<int> const& len2)
{
@@ -437,20 +437,20 @@ trymatch2(TokenIndex& V1, // source language vocab
TokenIndex& V2, // target language vocab
string const& source, // source phrase
string const& target, // observed target candidate
- vector<PhrasePair<Token> > const* const tcands,
+ vector<PhrasePair<Token> > const* const tcands,
vector<vector<int> >& X) // destination alignment matrix
// tcands: translations for source
{
- UnicodeString S,T;
+ UnicodeString S,T;
vector<int> t2p, s2p; // maps from character position in string to word pos.
vector<int> wlen_t, wlen_s; // individual word lengths
size_t slen = mapstring(source, S, s2p, &wlen_s);
size_t tlen = mapstring(target, T, t2p, &wlen_t);
-
+
X.assign(slen,vector<int>(tlen,0));
- if (slen == 1 && tlen ==1 && S == T)
+ if (slen == 1 && tlen ==1 && S == T)
X[0][0] = S.length();
- else
+ else
{
align_letters(S,s2p,T,t2p,X);
if (tcands) trymatch3(*tcands, T, tlen, t2p, V2, X);
@@ -475,7 +475,7 @@ trymatch2(TokenIndex& V1, // source language vocab
// float
-// trymatch(string const a, string const b,
+// trymatch(string const a, string const b,
// vector<PhrasePair<Token> > const* atrans,
// vector<PhrasePair<Token> > const* btrans)
// {
@@ -501,11 +501,11 @@ trymatch2(TokenIndex& V1, // source language vocab
// // float bar = float(lcs(foo,b))/min(foo.size(),b.size());
// float bar = float(lcs(foo,b));
-// if (bar > .5)
+// if (bar > .5)
// {
// // score = max(pp.score * bar,score);
// score = max(bar,score);
-// // cout << "[" << bar << "] " << foo << " ::: " << b
+// // cout << "[" << bar << "] " << foo << " ::: " << b
// // << " (" << a << ") " << pp.score << endl;
// }
// }
@@ -525,10 +525,10 @@ trymatch2(TokenIndex& V1, // source language vocab
// string foo = toString(*BT.V1,pp.start2,pp.len2);
// // float bar = float(lcs(a,foo))/min(a.size(),foo.size());
// float bar = float(lcs(a,foo));
-// if (bar > .5)
+// if (bar > .5)
// {
// score = max(bar,score);
-// // cout << "[" << bar<< "] " << a << " ::: " << foo
+// // cout << "[" << bar<< "] " << a << " ::: " << foo
// // << " (" << b << ") " << pp.score << endl;
// }
// }
@@ -547,8 +547,8 @@ struct ahyp
struct AlnPoint
{
enum status { no = 0, yes = 1, maybe = -1, undef = -7 };
- float score;
- status state;
+ float score;
+ status state;
AlnPoint() : score(0), state(undef) {}
};
@@ -562,14 +562,14 @@ class AlnMatrix
vector<bitvector> A1,A2; // final alignment matrix
vector<bitvector> S1,S2; // shadow alignment matrix
public:
- vector<bitvector*> m1,m2; // margins
+ vector<bitvector*> m1,m2; // margins
AlnMatrix(size_t const rows, size_t const cols);
- bitvector const&
+ bitvector const&
operator[](size_t const r) const
{ return A1.at(r); }
bool
- incorporate(span_t const& rspan, span_t const& cspan,
+ incorporate(span_t const& rspan, span_t const& cspan,
vector<uchar> const& aln, bool const flip);
size_t size() const { return A1.size(); }
@@ -588,9 +588,9 @@ AlnMatrix(size_t const rows, size_t const cols)
bool
AlnMatrix::
-incorporate(span_t const& rspan,
- span_t const& cspan,
- vector<uchar> const& aln,
+incorporate(span_t const& rspan,
+ span_t const& cspan,
+ vector<uchar> const& aln,
bool const flip)
{
for (size_t r = rspan.first; r < rspan.second; ++r)
@@ -622,7 +622,7 @@ incorporate(span_t const& rspan,
if (m1[r] && (*m1[r]) != S1[r]) return false;
for (size_t c = cspan.first; c < cspan.second; ++c)
if (m2[c] && (*m2[c]) != S2[c]) return false;
-
+
// all good, add new points
for (size_t r = rspan.first; r < rspan.second; ++r)
if (!m1[r]) { A1[r] = S1[r]; m1[r] = &A1[r]; }
@@ -632,9 +632,9 @@ incorporate(span_t const& rspan,
return true;
}
-struct alink
-{
- size_t r,c,m;
+struct alink
+{
+ size_t r,c,m;
bool operator<(alink const& o) const { return m < o.m; }
bool operator>(alink const& o) const { return m > o.m; }
};
@@ -659,9 +659,9 @@ int main(int argc, char* argv[])
vector<vector<uint64_t> > pm1,pm2;
BT.lookup(snt1,*BT.I1,pt1,&pm1,&scorer);
BT.lookup(snt2,*BT.I2,pt2,&pm2,&scorer);
-
+
// build map from phrases to positions
- typedef boost::unordered_map<uint64_t, vector<span_t> >
+ typedef boost::unordered_map<uint64_t, vector<span_t> >
p2s_map_t;
typedef p2s_map_t::iterator p2s_iter;
p2s_map_t p2s1,p2s2;
@@ -684,7 +684,7 @@ int main(int argc, char* argv[])
BOOST_FOREACH(PhrasePair<Token> const& pp, *pt1[i][k])
{
if (pp.score < 0) break;
- if (p2s2.find(pp.p2) != p2s2.end())
+ if (p2s2.find(pp.p2) != p2s2.end())
pp_all.push_back(pp);
}
}
@@ -704,10 +704,10 @@ int main(int argc, char* argv[])
{
PhrasePair<Token> const& pp = pp_all[p];
#if 0
- cout << (boost::format("%30s ::: %-30s ")
+ cout << (boost::format("%30s ::: %-30s ")
% BT.toString(pp.p1,0).c_str()
% BT.toString(pp.p2,1).c_str());
- cout << (boost::format("%.4f [%d/%d/%d]")
+ cout << (boost::format("%.4f [%d/%d/%d]")
% pp.score % pp.good1 % pp.joint % pp.good2);
for (size_t a = 0; a < pp.aln.size(); a += 2)
cout << " " << int(pp.aln[a]) << "-" << int(pp.aln[a+1]);
@@ -720,7 +720,7 @@ int main(int argc, char* argv[])
for (size_t i = v1[0].first; i < v1[0].second; ++i)
if (a1[i] < 0) a1[i] = p;
if (v2.size() == 1)
- for (size_t i = v2[0].first; i < v2[0].second; ++i)
+ for (size_t i = v2[0].first; i < v2[0].second; ++i)
if (a2[i] < 0) a2[i] = p;
if (v1.size() == 1 && v2.size() == 1)
@@ -740,11 +740,11 @@ int main(int argc, char* argv[])
vector<PhrasePair<Token> > const* atrans, *btrans;
ahyp h;
vector<ahyp> hyps;
- vector<vector<int> > L(snt1.size(),vector<int>(snt2.size(),0));
+ vector<vector<int> > L(snt1.size(),vector<int>(snt2.size(),0));
// L: matches by letter overlap
for (h.s1 = 0; h.s1 < a1.size(); ++h.s1)
- {
+ {
if (a1[h.s1] >= 0) continue;
ostringstream buf1;
for (h.e1 = h.s1; h.e1 < a1.size() && a1[h.e1] < 0; ++h.e1)
@@ -762,23 +762,23 @@ int main(int argc, char* argv[])
if (a2[h.s2] >= 0) continue;
for (h.e2 = h.s2; h.e2 < a2.size() && a2[h.e2] < 0; ++h.e2)
{
- if (h.e2 > h.s2)
+ if (h.e2 > h.s2)
{
if (pt2[h.s2].size() + h.s2 <= h.e2) break;
buf2 << " ";
}
buf2 << (*BT.V2)[snt2[h.e2].id()];
- btrans = (pt2[h.s2].size()
- ? pt2[h.s2].at(h.e2-h.s2).get()
+ btrans = (pt2[h.s2].size()
+ ? pt2[h.s2].at(h.e2-h.s2).get()
: NULL);
vector<vector<int> > aln;
- trymatch2(*BT.V1, *BT.V2, buf1.str(),buf2.str(),
+ trymatch2(*BT.V1, *BT.V2, buf1.str(),buf2.str(),
atrans,aln);
for (size_t i = 0; i < aln.size(); ++i)
for (size_t k = 0; k < aln[i].size(); ++k)
L[h.s1+i][h.s2+k] = max(L[h.s1+i][h.s2+k],aln[i][k]);
- trymatch2(*BT.V2, *BT.V1, buf2.str(),buf1.str(),
+ trymatch2(*BT.V2, *BT.V1, buf2.str(),buf1.str(),
btrans,aln);
for (size_t i = 0; i < aln[0].size(); ++i)
for (size_t k = 0; k < aln.size(); ++k)
@@ -795,7 +795,7 @@ int main(int argc, char* argv[])
alink x;
for (x.r = 0; x.r < L.size(); ++x.r)
{
-
+
for (x.c = 0; x.c < L[x.r].size(); ++x.c)
{
x.m = L[x.r][x.c];
@@ -807,22 +807,22 @@ int main(int argc, char* argv[])
BOOST_FOREACH(alink& x, links)
{
- if (L[x.r][x.c])
+ if (L[x.r][x.c])
{
cout << (*BT.V1)[snt1[x.r].id()] << " ::: "
<< (*BT.V2)[snt2[x.c].id()] << " ::: "
<< L[x.r][x.c] << endl;
}
- }
+ }
// sort(hyps.begin(),hyps.end(),greater<ahyp>());
// BOOST_FOREACH(ahyp const& h, hyps)
// {
// if (h.score < .5) break;
- // for (size_t i = h.s1; i <= h.e1; ++i)
+ // for (size_t i = h.s1; i <= h.e1; ++i)
// cout << i << ":" << (*BT.V1)[snt1[i].id()] << " ";
// cout << " ::: ";
- // for (size_t i = h.s2; i <= h.e2; ++i)
+ // for (size_t i = h.s2; i <= h.e2; ++i)
// cout << i << ":" << (*BT.V2)[snt2[i].id()] << " ";
// cout << h.score << endl;
// }
@@ -854,15 +854,15 @@ int main(int argc, char* argv[])
// #if 0
// if (match)
// {
-// if (first)
+// if (first)
// {
// cout << BT.toString(pm1[i][k],0) << endl;
// first = false;
// }
-// cout << boost::format("%.4f") % pt.score << " "
+// cout << boost::format("%.4f") % pt.score << " "
// << setw(5) << d1 << " " << (match ? "* " : " ")
// << toString(*BT.V2, pt.start2, pt.len2) << " ["
-// << pt.good1 << "/" << pt.joint << "/"
+// << pt.good1 << "/" << pt.joint << "/"
// << pt.good2 << "]";
// for (size_t a = 0; a < pt.aln.size(); a += 2)
// cout << " " << int(pt.aln[a]) << "-" << int(pt.aln[a+1]);
@@ -879,7 +879,7 @@ int main(int argc, char* argv[])
// pp_all.push_back(pt);
// // pp_all.back().m1 -= d1;
// }
-
+
// }
// if (!first) cout << endl;
// }
diff --git a/moses/TranslationModel/UG/util/ibm1-align.cc b/moses/TranslationModel/UG/util/ibm1-align.cc
index 08ac1f89b..3c43743d0 100644
--- a/moses/TranslationModel/UG/util/ibm1-align.cc
+++ b/moses/TranslationModel/UG/util/ibm1-align.cc
@@ -1,7 +1,7 @@
// -*- c++ -*-
// Parallel text alignment via IBM1 / raw counts of word alignments
// aiming at high precision (to seed Yawat alignments)
-// This program is tailored for use with Yawat.
+// This program is tailored for use with Yawat.
// Written by Ulrich Germann.
#include <string>
@@ -29,20 +29,20 @@ public:
table_t COOC;
TokenIndex V1,V2;
- void
+ void
align(string const& s1, string const& s2, vector<int>& aln) const;
- void
- align(vector<id_type> const& x1,
- vector<id_type> const& x2,
+ void
+ align(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<int>& aln) const;
-
- void
- fill_amatrix(vector<id_type> const& x1,
- vector<id_type> const& x2,
+
+ void
+ fill_amatrix(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<vector<int> >& aln) const;
- void
+ void
open(string const base, string const L1, string const L2);
};
@@ -75,10 +75,10 @@ u(StringPiece str, size_t start, size_t stop)
return ret;
}
-void
+void
IBM1::
-fill_amatrix(vector<id_type> const& x1,
- vector<id_type> const& x2,
+fill_amatrix(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<vector<int> >& aln) const
{
aln.assign(x1.size(),vector<int>(x2.size()));
@@ -108,8 +108,8 @@ fill_amatrix(vector<id_type> const& x1,
void
IBM1::
-align(vector<id_type> const& x1,
- vector<id_type> const& x2,
+align(vector<id_type> const& x1,
+ vector<id_type> const& x2,
vector<int>& aln) const
{
vector<vector<int> > M;
@@ -157,7 +157,7 @@ int main(int argc, char* argv[])
// cout << line1 << endl;
// cout << line2 << endl;
// for (size_t i = 0; i < a.size(); i += 2)
- // cout << ibm1.V1[s1[a[i]]] << " - "
+ // cout << ibm1.V1[s1[a[i]]] << " - "
// << ibm1.V2[s2[a[i+1]]] << endl;
}
// cout << endl;
diff --git a/moses/TranslationModel/UG/util/tokenindex.dump.cc b/moses/TranslationModel/UG/util/tokenindex.dump.cc
index 8ab68579d..0e885630f 100644
--- a/moses/TranslationModel/UG/util/tokenindex.dump.cc
+++ b/moses/TranslationModel/UG/util/tokenindex.dump.cc
@@ -13,7 +13,7 @@
using namespace std;
using namespace ugdiss;
-int
+int
main(int argc,char* argv[])
{
if (argc > 1 && !strcmp(argv[1], "-h")) {
diff --git a/moses/TranslationModel/fuzzy-match/Vocabulary.cpp b/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
index ab1439a29..b70eb98ca 100644
--- a/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
+++ b/moses/TranslationModel/fuzzy-match/Vocabulary.cpp
@@ -1,71 +1,71 @@
-// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
-#include "Vocabulary.h"
-#ifdef WITH_THREADS
-#include <boost/thread/locks.hpp>
-#endif
-
-using namespace std;
-
-namespace tmmt
-{
-
-// as in beamdecoder/tables.cpp
-vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
-{
- vector< WORD_ID > token;
- bool betweenWords = true;
- int start=0;
- int i=0;
- for(; input[i] != '\0'; i++) {
- bool isSpace = (input[i] == ' ' || input[i] == '\t');
-
- if (!isSpace && betweenWords) {
- start = i;
- betweenWords = false;
- } else if (isSpace && !betweenWords) {
- token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
- betweenWords = true;
- }
- }
- if (!betweenWords)
- token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
- return token;
-}
-
-WORD_ID Vocabulary::StoreIfNew( const WORD& word )
-{
-
- {
- // read=lock scope
-#ifdef WITH_THREADS
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
-#endif
- map<WORD, WORD_ID>::iterator i = lookup.find( word );
-
- if( i != lookup.end() )
- return i->second;
- }
-
-#ifdef WITH_THREADS
- boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
-#endif
- WORD_ID id = vocab.size();
- vocab.push_back( word );
- lookup[ word ] = id;
- return id;
-}
-
-WORD_ID Vocabulary::GetWordID( const WORD &word )
-{
-#ifdef WITH_THREADS
- boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
-#endif
- map<WORD, WORD_ID>::iterator i = lookup.find( word );
- if( i == lookup.end() )
- return 0;
- WORD_ID w= (WORD_ID) i->second;
- return w;
-}
-
-}
-
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+#ifdef WITH_THREADS
+#include <boost/thread/locks.hpp>
+#endif
+
+using namespace std;
+
+namespace tmmt
+{
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] )
+{
+ vector< WORD_ID > token;
+ bool betweenWords = true;
+ int start=0;
+ int i=0;
+ for(; input[i] != '\0'; i++) {
+ bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+ if (!isSpace && betweenWords) {
+ start = i;
+ betweenWords = false;
+ } else if (isSpace && !betweenWords) {
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ betweenWords = true;
+ }
+ }
+ if (!betweenWords)
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word )
+{
+
+ {
+ // read=lock scope
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+
+ if( i != lookup.end() )
+ return i->second;
+ }
+
+#ifdef WITH_THREADS
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
+ WORD_ID id = vocab.size();
+ vocab.push_back( word );
+ lookup[ word ] = id;
+ return id;
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word )
+{
+#ifdef WITH_THREADS
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+#endif
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+ if( i == lookup.end() )
+ return 0;
+ WORD_ID w= (WORD_ID) i->second;
+ return w;
+}
+
+}
+
diff --git a/moses/TranslationOption.cpp b/moses/TranslationOption.cpp
index 610df63f7..52bf49fb2 100644
--- a/moses/TranslationOption.cpp
+++ b/moses/TranslationOption.cpp
@@ -66,7 +66,7 @@ bool TranslationOption::Overlap(const Hypothesis &hypothesis) const
return bitmap.Overlap(GetSourceWordsRange());
}
-void
+void
TranslationOption::
CacheLexReorderingScores(const LexicalReordering &producer, const Scores &score)
{
@@ -112,7 +112,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
/** returns cached scores */
const Scores*
TranslationOption::
-GetLexReorderingScores(LexicalReordering const* scoreProducer) const
+GetLexReorderingScores(LexicalReordering const* scoreProducer) const
{
return m_targetPhrase.GetExtraScores(scoreProducer);
// _ScoreCacheMap::const_iterator it;
diff --git a/moses/TranslationOption.h b/moses/TranslationOption.h
index e99ff5d13..4bf545f7d 100644
--- a/moses/TranslationOption.h
+++ b/moses/TranslationOption.h
@@ -71,8 +71,8 @@ protected:
float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */
// typedef std::map<const LexicalReordering*, Scores> _ScoreCacheMap;
- // _ScoreCacheMap m_lexReorderingScores;
- // m_lexReorderingScores was moved to TargetPhrase.h so that phrase tables
+ // _ScoreCacheMap m_lexReorderingScores;
+ // m_lexReorderingScores was moved to TargetPhrase.h so that phrase tables
// can add information (such as lexical reordering scores) to target phrases
// during lookup.
@@ -156,14 +156,14 @@ public:
}
/** returns cached scores */
- // inline
+ // inline
const Scores*
GetLexReorderingScores(const LexicalReordering *scoreProducer) const;
// {
// return m_targetPhrase.GetExtraScores(scoreProducer);
// }
- void CacheLexReorderingScores(const LexicalReordering &scoreProducer,
+ void CacheLexReorderingScores(const LexicalReordering &scoreProducer,
const Scores &score);
TO_STRING();
diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp
index ae9ef6817..1e3ef9045 100644
--- a/moses/TranslationOptionCollection.cpp
+++ b/moses/TranslationOptionCollection.cpp
@@ -626,12 +626,12 @@ CacheLexReordering()
{
size_t const stop = m_source.GetSize();
typedef StatefulFeatureFunction sfFF;
- BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions())
+ BOOST_FOREACH(sfFF const* ff, sfFF::GetStatefulFeatureFunctions())
{
if (typeid(*ff) != typeid(LexicalReordering)) continue;
LexicalReordering const& lr = static_cast<const LexicalReordering&>(*ff);
- for (size_t s = 0 ; s < stop ; s++)
- BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
+ for (size_t s = 0 ; s < stop ; s++)
+ BOOST_FOREACH(TranslationOptionList& tol, m_collection[s])
lr.SetCache(tol);
}
}
diff --git a/moses/TranslationOptionCollection.h b/moses/TranslationOptionCollection.h
index bed060949..4c0a6bdc6 100644
--- a/moses/TranslationOptionCollection.h
+++ b/moses/TranslationOptionCollection.h
@@ -74,7 +74,7 @@ protected:
std::vector<const Phrase*> m_unksrcs;
InputPathList m_inputPathQueue;
- TranslationOptionCollection(ttasksptr const& ttask,
+ TranslationOptionCollection(ttasksptr const& ttask,
InputType const& src, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold);
diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp
index b344fc14f..387821102 100644
--- a/moses/TranslationOptionCollectionConfusionNet.cpp
+++ b/moses/TranslationOptionCollectionConfusionNet.cpp
@@ -20,7 +20,7 @@ namespace Moses
/** constructor; just initialize the base class */
TranslationOptionCollectionConfusionNet::
-TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
+TranslationOptionCollectionConfusionNet(ttasksptr const& ttask,
const ConfusionNet &input,
size_t maxNoTransOptPerCoverage,
float translationOptionThreshold)
@@ -161,8 +161,8 @@ void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePo
}
}
-
-void
+
+void
TranslationOptionCollectionConfusionNet
::CreateTranslationOptions()
{
@@ -202,7 +202,7 @@ CreateTranslationOptionsForRange(const DecodeGraph &decodeGraph,
bool
TranslationOptionCollectionConfusionNet::
CreateTranslationOptionsForRangeNew
-( const DecodeGraph &decodeGraph, size_t startPos, size_t endPos,
+( const DecodeGraph &decodeGraph, size_t startPos, size_t endPos,
bool adhereTableLimit, size_t graphInd)
{
InputPathList &inputPathList = GetInputPathList(startPos, endPos);
diff --git a/moses/TranslationOptionCollectionLattice.cpp b/moses/TranslationOptionCollectionLattice.cpp
index 9bb998070..e2d9e996a 100644
--- a/moses/TranslationOptionCollectionLattice.cpp
+++ b/moses/TranslationOptionCollectionLattice.cpp
@@ -20,9 +20,9 @@ namespace Moses
/** constructor; just initialize the base class */
TranslationOptionCollectionLattice
::TranslationOptionCollectionLattice
-( ttasksptr const& ttask, const WordLattice &input,
+( ttasksptr const& ttask, const WordLattice &input,
size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
- : TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage,
+ : TranslationOptionCollection(ttask, input, maxNoTransOptPerCoverage,
translationOptionThreshold)
{
UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp
index acb84c434..764ca998a 100644
--- a/moses/TranslationTask.cpp
+++ b/moses/TranslationTask.cpp
@@ -30,16 +30,16 @@ TranslationTask
return m_context_string;
}
-void
+void
TranslationTask
-::SetContextString(std::string const& context)
+::SetContextString(std::string const& context)
{
m_context_string = context;
}
-boost::shared_ptr<TranslationTask>
+boost::shared_ptr<TranslationTask>
TranslationTask
::create(boost::shared_ptr<InputType> const& source)
{
@@ -50,9 +50,9 @@ TranslationTask
return ret;
}
-boost::shared_ptr<TranslationTask>
+boost::shared_ptr<TranslationTask>
TranslationTask
-::create(boost::shared_ptr<InputType> const& source,
+::create(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper)
{
boost::shared_ptr<TranslationTask> ret(new TranslationTask(source, ioWrapper));
@@ -62,7 +62,7 @@ TranslationTask
}
TranslationTask
-::TranslationTask(boost::shared_ptr<InputType> const& source,
+::TranslationTask(boost::shared_ptr<InputType> const& source,
boost::shared_ptr<IOWrapper> const& ioWrapper)
: m_source(source) , m_ioWrapper(ioWrapper)
{ }
@@ -73,52 +73,52 @@ TranslationTask::~TranslationTask()
boost::shared_ptr<BaseManager>
TranslationTask
-::SetupManager(SearchAlgorithm algo)
+::SetupManager(SearchAlgorithm algo)
{
boost::shared_ptr<BaseManager> manager;
StaticData const& staticData = StaticData::Instance();
if (algo == DefaultSearchAlgorithm) algo = staticData.GetSearchAlgorithm();
- if (!staticData.IsSyntax(algo))
+ if (!staticData.IsSyntax(algo))
manager.reset(new Manager(this->self())); // phrase-based
- else if (algo == SyntaxF2S || algo == SyntaxT2S)
+ else if (algo == SyntaxF2S || algo == SyntaxT2S)
{ // STSG-based tree-to-string / forest-to-string decoding (ask Phil Williams)
typedef Syntax::F2S::RuleMatcherCallback Callback;
typedef Syntax::F2S::RuleMatcherHyperTree<Callback> RuleMatcher;
manager.reset(new Syntax::F2S::Manager<RuleMatcher>(this->self()));
- }
+ }
- else if (algo == SyntaxS2T)
+ else if (algo == SyntaxS2T)
{ // new-style string-to-tree decoding (ask Phil Williams)
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
- if (algorithm == RecursiveCYKPlus)
+ if (algorithm == RecursiveCYKPlus)
{
typedef Syntax::S2T::EagerParserCallback Callback;
typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
- }
- else if (algorithm == Scope3)
+ }
+ else if (algorithm == Scope3)
{
typedef Syntax::S2T::StandardParserCallback Callback;
typedef Syntax::S2T::Scope3Parser<Callback> Parser;
manager.reset(new Syntax::S2T::Manager<Parser>(this->self()));
- }
+ }
else UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
- }
+ }
- else if (algo == SyntaxT2S_SCFG)
+ else if (algo == SyntaxT2S_SCFG)
{ // SCFG-based tree-to-string decoding (ask Phil Williams)
typedef Syntax::F2S::RuleMatcherCallback Callback;
typedef Syntax::T2S::RuleMatcherSCFG<Callback> RuleMatcher;
manager.reset(new Syntax::T2S::Manager<RuleMatcher>(this->self()));
- }
+ }
else if (algo == ChartIncremental) // Ken's incremental decoding
manager.reset(new Incremental::Manager(this->self()));
else // original SCFG manager
- manager.reset(new ChartManager(this->self()));
+ manager.reset(new ChartManager(this->self()));
return manager;
}
@@ -151,17 +151,17 @@ void TranslationTask::Run()
boost::shared_ptr<BaseManager> manager = SetupManager();
- VERBOSE(1, "Line " << translationId << ": Initialize search took "
+ VERBOSE(1, "Line " << translationId << ": Initialize search took "
<< initTime << " seconds total" << endl);
manager->Decode();
- // new: stop here if m_ioWrapper is NULL. This means that the
+ // new: stop here if m_ioWrapper is NULL. This means that the
// owner of the TranslationTask will take care of the output
// oh, and by the way, all the output should be handled by the
// output wrapper along the lines of *m_iwWrapper << *manager;
// Just sayin' ...
- if (m_ioWrapper == NULL) return;
+ if (m_ioWrapper == NULL) return;
// we are done with search, let's look what we got
OutputCollector* ocoll;
@@ -182,7 +182,7 @@ void TranslationTask::Run()
// Output search graph in hypergraph format for Kenneth Heafield's
// lazy hypergraph decoder; writes to stderr
- manager->OutputSearchGraphHypergraph();
+ manager->OutputSearchGraphHypergraph();
additionalReportingTime.stop();
@@ -208,9 +208,9 @@ void TranslationTask::Run()
// report additional statistics
manager->CalcDecoderStatistics();
- VERBOSE(1, "Line " << translationId << ": Additional reporting took "
+ VERBOSE(1, "Line " << translationId << ": Additional reporting took "
<< additionalReportingTime << " seconds total" << endl);
- VERBOSE(1, "Line " << translationId << ": Translation took "
+ VERBOSE(1, "Line " << translationId << ": Translation took "
<< translationTime << " seconds total" << endl);
IFVERBOSE(2) {
PrintUserTime("Sentence Decoding Time:");
diff --git a/moses/TranslationTask.h b/moses/TranslationTask.h
index 6679cb9b3..df1cf9f48 100644
--- a/moses/TranslationTask.h
+++ b/moses/TranslationTask.h
@@ -36,10 +36,10 @@ class OutputCollector;
**/
class TranslationTask : public Moses::Task
{
- // no copying, no assignment
+ // no copying, no assignment
TranslationTask(TranslationTask const& other) { }
- TranslationTask const&
+ TranslationTask const&
operator=(TranslationTask const& other) { return *this; }
protected:
@@ -47,9 +47,9 @@ protected:
boost::shared_ptr<ContextScope> m_scope; // sores local info
// pointer to ContextScope, which stores context-specific information
TranslationTask() { } ;
- TranslationTask(boost::shared_ptr<Moses::InputType> const& source,
+ TranslationTask(boost::shared_ptr<Moses::InputType> const& source,
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
- // Yes, the constructor is protected.
+ // Yes, the constructor is protected.
//
// TranslationTasks can only be created through the creator
// functions create(...). The creator functions set m_self to a
@@ -64,33 +64,33 @@ protected:
// task is still live or not, or maintain a shared_ptr to ensure the
// task stays alive till it's done with it.
- std::string m_context_string;
+ std::string m_context_string;
public:
-
- boost::shared_ptr<TranslationTask>
+
+ boost::shared_ptr<TranslationTask>
self() { return m_self.lock(); }
virtual
- boost::shared_ptr<TranslationTask const>
+ boost::shared_ptr<TranslationTask const>
self() const { return m_self.lock(); }
// creator functions
- static boost::shared_ptr<TranslationTask> create();
+ static boost::shared_ptr<TranslationTask> create();
static
- boost::shared_ptr<TranslationTask>
+ boost::shared_ptr<TranslationTask>
create(boost::shared_ptr<Moses::InputType> const& source);
static
- boost::shared_ptr<TranslationTask>
- create(boost::shared_ptr<Moses::InputType> const& source,
+ boost::shared_ptr<TranslationTask>
+ create(boost::shared_ptr<Moses::InputType> const& source,
boost::shared_ptr<Moses::IOWrapper> const& ioWrapper);
-
+
~TranslationTask();
/** Translate one sentence
* gets called by main function implemented at end of this source file */
virtual void Run();
-
+
boost::shared_ptr<Moses::InputType>
GetSource() const { return m_source; }
@@ -107,9 +107,9 @@ public:
std::string const& GetContextString() const;
void SetContextString(std::string const& context);
-
+
protected:
- boost::shared_ptr<Moses::InputType> m_source;
+ boost::shared_ptr<Moses::InputType> m_source;
boost::shared_ptr<Moses::IOWrapper> m_ioWrapper;
};
diff --git a/moses/Util.h b/moses/Util.h
index 68989721c..5c9b493f2 100644
--- a/moses/Util.h
+++ b/moses/Util.h
@@ -421,7 +421,7 @@ inline float CalcTranslationScore(const std::vector<float> &probVector,
out << *this; \
return out.str(); \
} \
-
+
//! delete and remove every element of a collection object such as set, list etc
template<class COLL>
void RemoveAllInColl(COLL &coll)
diff --git a/moses/WordLattice.h b/moses/WordLattice.h
index 70c2122d4..4dc937858 100644
--- a/moses/WordLattice.h
+++ b/moses/WordLattice.h
@@ -10,7 +10,7 @@
namespace Moses
{
-class TranslationTask;
+class TranslationTask;
/** An input to the decoder that represent a word lattice.
* @todo why is this inherited from confusion net?
diff --git a/moses/server/Optimizer.cpp b/moses/server/Optimizer.cpp
index d7d5f939c..d28d7f085 100644
--- a/moses/server/Optimizer.cpp
+++ b/moses/server/Optimizer.cpp
@@ -18,47 +18,47 @@ namespace MosesServer
void
Optimizer::
execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP)
+ xmlrpc_c::value * const retvalP)
{
#ifdef WITH_DLIB
const params_t params = paramList.getStruct(0);
params_t::const_iterator si;
- if ((si = params.find("model_name")) == params.end())
+ if ((si = params.find("model_name")) == params.end())
{
string msg = "Missing name of model to be optimized";
msg += " (e.g. PhraseDictionaryMultiModelCounts0)";
throw xmlrpc_c::fault(msg, xmlrpc_c::fault::CODE_PARSE);
}
const string model_name = xmlrpc_c::value_string(si->second);
-
- if ((si = params.find("phrase_pairs")) == params.end())
+
+ if ((si = params.find("phrase_pairs")) == params.end())
{
throw xmlrpc_c::fault("Missing list of phrase pairs",
xmlrpc_c::fault::CODE_PARSE);
}
-
+
vector<pair<string, string> > phrase_pairs;
-
+
xmlrpc_c::value_array pp_array = xmlrpc_c::value_array(si->second);
vector<xmlrpc_c::value> ppValVec(pp_array.vectorValueValue());
- for (size_t i = 0; i < ppValVec.size(); ++i)
+ for (size_t i = 0; i < ppValVec.size(); ++i)
{
- xmlrpc_c::value_array pp_array
+ xmlrpc_c::value_array pp_array
= xmlrpc_c::value_array(ppValVec[i]);
vector<xmlrpc_c::value> pp(pp_array.vectorValueValue());
string L1 = xmlrpc_c::value_string(pp[0]);
string L2 = xmlrpc_c::value_string(pp[1]);
phrase_pairs.push_back(make_pair(L1,L2));
}
-
- // PhraseDictionaryMultiModel* pdmm
+
+ // PhraseDictionaryMultiModel* pdmm
// = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
PhraseDictionaryMultiModel* pdmm = FindPhraseDictionary(model_name);
vector<float> weight_vector = pdmm->MinimizePerplexity(phrase_pairs);
-
+
vector<xmlrpc_c::value> weight_vector_ret;
- for (size_t i=0;i < weight_vector.size();i++)
+ for (size_t i=0;i < weight_vector.size();i++)
weight_vector_ret.push_back(xmlrpc_c::value_double(weight_vector[i]));
*retvalP = xmlrpc_c::value_array(weight_vector_ret);
diff --git a/moses/server/Optimizer.h b/moses/server/Optimizer.h
index 5e2302d09..8911b089f 100644
--- a/moses/server/Optimizer.h
+++ b/moses/server/Optimizer.h
@@ -6,11 +6,11 @@
namespace MosesServer
{
- class
+ class
Optimizer : public xmlrpc_c::method
{
public:
- Optimizer();
+ Optimizer();
void execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP);
};
diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp
index aab8867b5..62e3031fa 100644
--- a/moses/server/TranslationRequest.cpp
+++ b/moses/server/TranslationRequest.cpp
@@ -4,9 +4,9 @@
namespace MosesServer
{
using namespace std;
- using Moses::Hypothesis;
- using Moses::StaticData;
- using Moses::WordsRange;
+ using Moses::Hypothesis;
+ using Moses::StaticData;
+ using Moses::WordsRange;
using Moses::ChartHypothesis;
using Moses::Phrase;
using Moses::Manager;
@@ -23,8 +23,8 @@ namespace MosesServer
boost::shared_ptr<TranslationRequest>
TranslationRequest::
- create(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
+ create(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
boost::mutex& mut)
{
boost::shared_ptr<TranslationRequest> ret;
@@ -33,75 +33,75 @@ namespace MosesServer
return ret;
}
- void
+ void
TranslationRequest::
- Run()
+ Run()
{
parse_request(m_paramList.getStruct(0));
-
+
Moses::StaticData const& SD = Moses::StaticData::Instance();
-
+
//Make sure alternative paths are retained, if necessary
- if (m_withGraphInfo || m_nbestSize>0)
+ if (m_withGraphInfo || m_nbestSize>0)
// why on earth is this a global variable? Is this even thread-safe???? UG
(const_cast<Moses::StaticData&>(SD)).SetOutputSearchGraph(true);
-
+
std::stringstream out, graphInfo, transCollOpts;
-
- if (SD.IsSyntax())
+
+ if (SD.IsSyntax())
run_chart_decoder();
- else
+ else
run_phrase_decoder();
-
+
XVERBOSE(1,"Output: " << out.str() << endl);
{
boost::lock_guard<boost::mutex> lock(m_mutex);
m_done = true;
}
m_cond.notify_one();
-
+
}
-
+
/// add phrase alignment information from a Hypothesis
- void
+ void
TranslationRequest::
add_phrase_aln_info(Hypothesis const& h, vector<xmlrpc_c::value>& aInfo) const
{
if (!m_withAlignInfo) return;
WordsRange const& trg = h.GetCurrTargetWordsRange();
WordsRange const& src = h.GetCurrSourceWordsRange();
-
+
std::map<std::string, xmlrpc_c::value> pAlnInfo;
pAlnInfo["tgt-start"] = xmlrpc_c::value_int(trg.GetStartPos());
pAlnInfo["src-start"] = xmlrpc_c::value_int(src.GetStartPos());
pAlnInfo["src-end"] = xmlrpc_c::value_int(src.GetEndPos());
aInfo.push_back(xmlrpc_c::value_struct(pAlnInfo));
}
-
- void
+
+ void
TranslationRequest::
- outputChartHypo(ostream& out, const ChartHypothesis* hypo)
+ outputChartHypo(ostream& out, const ChartHypothesis* hypo)
{
Phrase outPhrase(20);
hypo->GetOutputPhrase(outPhrase);
-
+
// delete 1st & last
assert(outPhrase.GetSize() >= 2);
outPhrase.RemoveWord(0);
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
- for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
+ for (size_t pos = 0 ; pos < outPhrase.GetSize() ; pos++)
out << *outPhrase.GetFactor(pos, 0) << " ";
}
- bool
+ bool
TranslationRequest::
- compareSearchGraphNode(const Moses::SearchGraphNode& a,
- const Moses::SearchGraphNode& b)
+ compareSearchGraphNode(const Moses::SearchGraphNode& a,
+ const Moses::SearchGraphNode& b)
{ return a.hypo->GetId() < b.hypo->GetId(); }
- void
+ void
TranslationRequest::
- insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
+ insertGraphInfo(Manager& manager, map<string, xmlrpc_c::value>& retData)
{
using xmlrpc_c::value_int;
using xmlrpc_c::value_double;
@@ -119,13 +119,13 @@ namespace MosesServer
const Hypothesis* hypo = n.hypo;
x["hyp"] = value_int(hypo->GetId());
x["stack"] = value_int(hypo->GetWordsBitmap().GetNumWordsCovered());
- if (hypo->GetId() != 0)
+ if (hypo->GetId() != 0)
{
const Hypothesis *prevHypo = hypo->GetPrevHypo();
x["back"] = value_int(prevHypo->GetId());
x["score"] = value_double(hypo->GetScore());
x["transition"] = value_double(hypo->GetScore() - prevHypo->GetScore());
- if (n.recombinationHypo)
+ if (n.recombinationHypo)
x["recombined"] = value_int(n.recombinationHypo->GetId());
x["cover-start"] = value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
x["cover-end"] = value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
@@ -136,26 +136,26 @@ namespace MosesServer
retData["sg"] = xmlrpc_c::value_array(searchGraphXml);
}
- void
+ void
TranslationRequest::
output_phrase(ostream& out, Phrase const& phrase) const
{
- if (!m_reportAllFactors)
+ if (!m_reportAllFactors)
{
- for (size_t i = 0 ; i < phrase.GetSize(); ++i)
+ for (size_t i = 0 ; i < phrase.GetSize(); ++i)
out << *phrase.GetFactor(i, 0) << " ";
}
else out << phrase;
}
-
- void
+
+ void
TranslationRequest::
outputNBest(const Manager& manager, map<string, xmlrpc_c::value>& retData)
{
TrellisPathList nBestList;
vector<xmlrpc_c::value> nBestXml;
manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct);
-
+
BOOST_FOREACH(Moses::TrellisPath const* path, nBestList)
{
vector<const Hypothesis *> const& E = path->GetEdges();
@@ -169,27 +169,27 @@ namespace MosesServer
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
}
-
+
// weighted score
nBestXmlItem["totalScore"] = xmlrpc_c::value_double(path->GetTotalScore());
nBestXml.push_back(xmlrpc_c::value_struct(nBestXmlItem));
}
retData["nbest"] = xmlrpc_c::value_array(nBestXml);
}
-
- void
+
+ void
TranslationRequest::
- insertTranslationOptions(Moses::Manager& manager,
- std::map<std::string, xmlrpc_c::value>& retData)
+ insertTranslationOptions(Moses::Manager& manager,
+ std::map<std::string, xmlrpc_c::value>& retData)
{
- const TranslationOptionCollection* toptsColl
+ const TranslationOptionCollection* toptsColl
= manager.getSntTranslationOptions();
vector<xmlrpc_c::value> toptsXml;
size_t const stop = toptsColl->GetSource().GetSize();
TranslationOptionList const* tol;
- for (size_t s = 0 ; s < stop ; ++s)
+ for (size_t s = 0 ; s < stop ; ++s)
{
- for (size_t e = s;
+ for (size_t e = s;
(tol = toptsColl->GetTranslationOptionList(s,e)) != NULL;
++e)
{
@@ -204,11 +204,11 @@ namespace MosesServer
toptXml["start"] = xmlrpc_c::value_int(s);
toptXml["end"] = xmlrpc_c::value_int(e);
vector<xmlrpc_c::value> scoresXml;
- const std::valarray<FValue> &scores
+ const std::valarray<FValue> &scores
= topt->GetScoreBreakdown().getCoreFeatures();
- for (size_t j = 0; j < scores.size(); ++j)
+ for (size_t j = 0; j < scores.size(); ++j)
scoresXml.push_back(xmlrpc_c::value_double(scores[j]));
-
+
toptXml["scores"] = xmlrpc_c::value_array(scoresXml);
toptsXml.push_back(xmlrpc_c::value_struct(toptXml));
}
@@ -217,13 +217,13 @@ namespace MosesServer
retData["topt"] = xmlrpc_c::value_array(toptsXml);
}
- bool
+ bool
check(std::map<std::string, xmlrpc_c::value> const& params, std::string const key)
{
std::map<std::string, xmlrpc_c::value>::const_iterator m;
return (params.find(key) != params.end());
}
-
+
TranslationRequest::
TranslationRequest(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut)
@@ -236,15 +236,15 @@ namespace MosesServer
{ // parse XMLRPC request
// params_t const params = m_paramList.getStruct(0);
m_paramList.verifyEnd(1); // ??? UG
-
+
// source text must be given, or we don't know what to translate
typedef std::map<std::string, xmlrpc_c::value> params_t;
params_t::const_iterator si = params.find("text");
- if (si == params.end())
+ if (si == params.end())
throw xmlrpc_c::fault("Missing source text", xmlrpc_c::fault::CODE_PARSE);
m_source_string = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"Input: " << m_source_string << endl);
-
+
m_withAlignInfo = check(params, "align");
m_withWordAlignInfo = check(params, "word-align");
m_withGraphInfo = check(params, "sg");
@@ -252,31 +252,31 @@ namespace MosesServer
m_reportAllFactors = check(params, "report-all-factors");
m_nbestDistinct = check(params, "nbest-distinct");
m_withScoreBreakdown = check(params, "add-score-breakdown");
- m_source.reset(new Sentence(0,m_source_string));
+ m_source.reset(new Sentence(0,m_source_string));
si = params.find("lambda");
- if (si != params.end())
+ if (si != params.end())
{
// muMo = multiModel
xmlrpc_c::value_array muMoArray = xmlrpc_c::value_array(si->second);
vector<xmlrpc_c::value> muMoValVec(muMoArray.vectorValueValue());
vector<float> w(muMoValVec.size());
- for (size_t i = 0; i < muMoValVec.size(); ++i)
+ for (size_t i = 0; i < muMoValVec.size(); ++i)
w[i] = xmlrpc_c::value_double(muMoValVec[i]);
if (w.size() && (si = params.find("model_name")) != params.end())
{
string const model_name = xmlrpc_c::value_string(si->second);
- PhraseDictionaryMultiModel* pdmm
+ PhraseDictionaryMultiModel* pdmm
= (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name);
- // Moses::PhraseDictionaryMultiModel* pdmm
+ // Moses::PhraseDictionaryMultiModel* pdmm
// = FindPhraseDictionary(model_name);
pdmm->SetTemporaryMultiModelWeightsVector(w);
}
}
-
+
// // biased sampling for suffix-array-based sampling phrase table?
// if ((si = params.find("bias")) != params.end())
- // {
- // std::vector<xmlrpc_c::value> tmp
+ // {
+ // std::vector<xmlrpc_c::value> tmp
// = xmlrpc_c::value_array(si->second).cvalue();
// for (size_t i = 1; i < tmp.size(); i += 2)
// m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]);
@@ -288,28 +288,28 @@ namespace MosesServer
TranslationRequest::
run_chart_decoder()
{
- Moses::TreeInput tinput;
+ Moses::TreeInput tinput;
istringstream buf(m_source_string + "\n");
tinput.Read(buf, StaticData::Instance().GetInputFactorOrder());
-
+
Moses::ChartManager manager(this->self());
manager.Decode();
-
+
const Moses::ChartHypothesis *hypo = manager.GetBestHypothesis();
ostringstream out;
outputChartHypo(out,hypo);
-
+
m_target_string = out.str();
m_retData["text"] = xmlrpc_c::value_string(m_target_string);
-
- if (m_withGraphInfo)
+
+ if (m_withGraphInfo)
{
std::ostringstream sgstream;
manager.OutputSearchGraphMoses(sgstream);
m_retData["sg"] = xmlrpc_c::value_string(sgstream.str());
}
} // end of TranslationRequest::run_chart_decoder()
-
+
void
TranslationRequest::
pack_hypothesis(vector<Hypothesis const* > const& edges, string const& key,
@@ -320,7 +320,7 @@ namespace MosesServer
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges)
output_phrase(target, e->GetCurrTargetPhrase());
dest[key] = xmlrpc_c::value_string(target.str());
-
+
if (m_withAlignInfo)
{ // phrase alignment, if requested
@@ -359,16 +359,16 @@ namespace MosesServer
Manager manager(this->self());
// if (m_bias.size()) manager.SetBias(&m_bias);
manager.Decode();
-
+
pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData);
-
+
if (m_withGraphInfo) insertGraphInfo(manager,m_retData);
if (m_withTopts) insertTranslationOptions(manager,m_retData);
if (m_nbestSize) outputNBest(manager, m_retData);
-
+
(const_cast<StaticData&>(Moses::StaticData::Instance()))
- .SetOutputSearchGraph(false);
+ .SetOutputSearchGraph(false);
// WTF? one more reason not to have this as global variable! --- UG
-
+
}
}
diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h
index 89b3c5097..6c7cd7275 100644
--- a/moses/server/TranslationRequest.h
+++ b/moses/server/TranslationRequest.h
@@ -23,7 +23,7 @@
#include <xmlrpc-c/base.hpp>
namespace MosesServer
{
- class
+ class
TranslationRequest : public virtual Moses::TranslationTask
{
boost::condition_variable& m_cond;
@@ -33,7 +33,7 @@ namespace MosesServer
xmlrpc_c::paramList const& m_paramList;
std::map<std::string, xmlrpc_c::value> m_retData;
std::map<uint32_t,float> m_bias; // for biased sampling
-
+
std::string m_source_string, m_target_string;
bool m_withAlignInfo;
bool m_withWordAlignInfo;
@@ -44,21 +44,21 @@ namespace MosesServer
bool m_withScoreBreakdown;
size_t m_nbestSize;
- void
+ void
parse_request();
void
parse_request(std::map<std::string, xmlrpc_c::value> const& req);
-
+
virtual void
run_chart_decoder();
virtual void
run_phrase_decoder();
-
- void
- pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
- std::string const& key,
+
+ void
+ pack_hypothesis(std::vector<Moses::Hypothesis const* > const& edges,
+ std::string const& key,
std::map<std::string, xmlrpc_c::value> & dest) const;
void
@@ -66,57 +66,57 @@ namespace MosesServer
std::map<std::string, xmlrpc_c::value> & dest) const;
- void
+ void
output_phrase(std::ostream& out, Moses::Phrase const& phrase) const;
- void
- add_phrase_aln_info(Moses::Hypothesis const& h,
+ void
+ add_phrase_aln_info(Moses::Hypothesis const& h,
std::vector<xmlrpc_c::value>& aInfo) const;
- void
+ void
outputChartHypo(std::ostream& out, const Moses::ChartHypothesis* hypo);
- bool
- compareSearchGraphNode(const Moses::SearchGraphNode& a,
+ bool
+ compareSearchGraphNode(const Moses::SearchGraphNode& a,
const Moses::SearchGraphNode& b);
- void
- insertGraphInfo(Moses::Manager& manager,
- std::map<std::string, xmlrpc_c::value>& retData);
- void
- outputNBest(Moses::Manager const& manager,
+ void
+ insertGraphInfo(Moses::Manager& manager,
+ std::map<std::string, xmlrpc_c::value>& retData);
+ void
+ outputNBest(Moses::Manager const& manager,
std::map<std::string, xmlrpc_c::value>& retData);
- void
- insertTranslationOptions(Moses::Manager& manager,
+ void
+ insertTranslationOptions(Moses::Manager& manager,
std::map<std::string, xmlrpc_c::value>& retData);
protected:
- TranslationRequest(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
+ TranslationRequest(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
boost::mutex& mut);
public:
static
boost::shared_ptr<TranslationRequest>
- create(xmlrpc_c::paramList const& paramList,
- boost::condition_variable& cond,
+ create(xmlrpc_c::paramList const& paramList,
+ boost::condition_variable& cond,
boost::mutex& mut);
-
-
- virtual bool
+
+
+ virtual bool
DeleteAfterExecution() { return false; }
-
- bool
+
+ bool
IsDone() const { return m_done; }
-
- std::map<std::string, xmlrpc_c::value> const&
+
+ std::map<std::string, xmlrpc_c::value> const&
GetRetData() { return m_retData; }
-
- void
+
+ void
Run();
-
-
+
+
};
}
diff --git a/moses/server/Translator.cpp b/moses/server/Translator.cpp
index 51f863c4b..d4cff99df 100644
--- a/moses/server/Translator.cpp
+++ b/moses/server/Translator.cpp
@@ -8,8 +8,8 @@ namespace MosesServer
using namespace Moses;
Translator::
- Translator(size_t numThreads)
- : m_threadPool(numThreads)
+ Translator(size_t numThreads)
+ : m_threadPool(numThreads)
{
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
@@ -17,21 +17,21 @@ namespace MosesServer
this->_signature = "S:S";
this->_help = "Does translation";
}
-
- void
+
+ void
Translator::
execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP)
+ xmlrpc_c::value * const retvalP)
{
boost::condition_variable cond;
boost::mutex mut;
- boost::shared_ptr<TranslationRequest> task
+ boost::shared_ptr<TranslationRequest> task
= TranslationRequest::create(paramList,cond,mut);
m_threadPool.Submit(task);
boost::unique_lock<boost::mutex> lock(mut);
- while (!task->IsDone())
+ while (!task->IsDone())
cond.wait(lock);
*retvalP = xmlrpc_c::value_struct(task->GetRetData());
}
-
+
}
diff --git a/moses/server/Translator.h b/moses/server/Translator.h
index 062080545..e3117c290 100644
--- a/moses/server/Translator.h
+++ b/moses/server/Translator.h
@@ -10,17 +10,17 @@
#endif
namespace MosesServer
{
- class
+ class
// MosesServer::
Translator : public xmlrpc_c::method
{
public:
Translator(size_t numThreads = 10);
-
+
void execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP);
private:
Moses::ThreadPool m_threadPool;
};
-
+
}
diff --git a/moses/server/Updater.cpp b/moses/server/Updater.cpp
index 95cafd71a..818f374a5 100644
--- a/moses/server/Updater.cpp
+++ b/moses/server/Updater.cpp
@@ -6,7 +6,7 @@ namespace MosesServer
using namespace std;
Updater::
- Updater()
+ Updater()
{
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
@@ -18,7 +18,7 @@ namespace MosesServer
void
Updater::
execute(xmlrpc_c::paramList const& paramList,
- xmlrpc_c::value * const retvalP)
+ xmlrpc_c::value * const retvalP)
{
#if PT_UG
const params_t params = paramList.getStruct(0);
@@ -29,20 +29,20 @@ namespace MosesServer
*retvalP = xmlrpc_c::value_string("Phrase table updated");
#endif
};
-
- void
+
+ void
Updater::
- breakOutParams(const params_t& params)
+ breakOutParams(const params_t& params)
{
params_t::const_iterator si = params.find("source");
if(si == params.end())
- throw xmlrpc_c::fault("Missing source sentence",
+ throw xmlrpc_c::fault("Missing source sentence",
xmlrpc_c::fault::CODE_PARSE);
m_src = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"source = " << m_src << endl);
si = params.find("target");
if(si == params.end())
- throw xmlrpc_c::fault("Missing target sentence",
+ throw xmlrpc_c::fault("Missing target sentence",
xmlrpc_c::fault::CODE_PARSE);
m_trg = xmlrpc_c::value_string(si->second);
XVERBOSE(1,"target = " << m_trg << endl);
@@ -53,5 +53,5 @@ namespace MosesServer
m_bounded = ((si = params.find("bounded")) != params.end());
m_add2ORLM = ((si = params.find("updateORLM")) != params.end());
};
-
+
}
diff --git a/moses/server/Updater.h b/moses/server/Updater.h
index c3c72da50..9bb20b775 100644
--- a/moses/server/Updater.h
+++ b/moses/server/Updater.h
@@ -19,7 +19,7 @@
namespace MosesServer
{
- class
+ class
Updater: public xmlrpc_c::method
{
@@ -31,14 +31,14 @@ namespace MosesServer
public:
Updater();
-
+
void
execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP);
- void
+ void
breakOutParams(const params_t& params);
-
+
};
}
diff --git a/moses/thread_safe_container.h b/moses/thread_safe_container.h
index a6bb96c7c..1983d7234 100644
--- a/moses/thread_safe_container.h
+++ b/moses/thread_safe_container.h
@@ -17,8 +17,8 @@
namespace Moses
{
-
- // todo: replace this with thread lock-free containers, if a stable library can
+
+ // todo: replace this with thread lock-free containers, if a stable library can
// be found somewhere
template<typename KEY, typename VAL, class CONTAINER = std::map<KEY,VAL> >
@@ -43,22 +43,22 @@ namespace Moses
public:
locking_iterator() : m_container(NULL) { }
- locking_iterator(boost::shared_mutex& lock,
- CONTAINER const* container,
+ locking_iterator(boost::shared_mutex& lock,
+ CONTAINER const* container,
const_iter_t const& iter)
: m_lock(lock), m_container(container), m_iter(iter)
{ }
- entry_t const& operator->()
- {
+ entry_t const& operator->()
+ {
UTIL_THROW_IF2(m_container == NULL, "This locking iterator is invalid "
<< "or has not been assigned.");
- return m_iter.operator->();
+ return m_iter.operator->();
}
// locking operators transfer the lock upon assignment and become invalid
locking_iterator const&
- operator=(locking_iterator& other)
+ operator=(locking_iterator& other)
{
m_lock.swap(other.m_lock);
m_iter = other.m_iter;
@@ -71,22 +71,22 @@ namespace Moses
return m_iter == other;
}
- locking_iterator const&
+ locking_iterator const&
operator++() { ++m_iter; return *this; }
- // DO NOT DEFINE THE POST-INCREMENT OPERATOR!
- // locking_operators are non-copyable,
- // so we can't simply make a copy before incrementing and return
+ // DO NOT DEFINE THE POST-INCREMENT OPERATOR!
+ // locking_operators are non-copyable,
+ // so we can't simply make a copy before incrementing and return
// the copy after incrementing
- locking_iterator const&
- operator++(int);
+ locking_iterator const&
+ operator++(int);
};
const_iter_t const& end() const
{ return m_container.end(); }
locking_iterator begin() const
- {
+ {
return locking_iterator(m_lock, this, m_container.begin());
}
@@ -115,7 +115,7 @@ namespace Moses
return &m->second;
}
- size_t erase(KEY const& key)
+ size_t erase(KEY const& key)
{
boost::unique_lock< boost::shared_mutex > lock(m_lock);
return m_container.erase(key);
diff --git a/phrase-extract/ExtractionPhrasePair.cpp b/phrase-extract/ExtractionPhrasePair.cpp
index cde712ac6..57821fe44 100644
--- a/phrase-extract/ExtractionPhrasePair.cpp
+++ b/phrase-extract/ExtractionPhrasePair.cpp
@@ -242,7 +242,7 @@ void ExtractionPhrasePair::AddProperties( const std::string &propertiesString, f
vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
if (keyValue.size() == 2) {
AddProperty(keyValue[0], keyValue[1], count);
- }
+ }
}
}
diff --git a/phrase-extract/XmlTree.h b/phrase-extract/XmlTree.h
index a8c6888d6..50b1c0acc 100644
--- a/phrase-extract/XmlTree.h
+++ b/phrase-extract/XmlTree.h
@@ -1,43 +1,43 @@
-// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
-// vim:tabstop=2
-
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2006 University of Edinburgh
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-#include <string>
-#include <vector>
-#include <set>
-#include <map>
-#include "SyntaxTree.h"
-
-namespace MosesTraining
-{
-
-std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
-std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
-std::string TrimXml(const std::string& str);
-bool isXmlTag(const std::string& tag);
-std::vector<std::string> TokenizeXml(const std::string& str);
-bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection, bool unescape = true);
-std::string unescape(const std::string &str);
-
-
-} // namespace
-
+// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
+// vim:tabstop=2
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include "SyntaxTree.h"
+
+namespace MosesTraining
+{
+
+std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
+std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
+std::string TrimXml(const std::string& str);
+bool isXmlTag(const std::string& tag);
+std::vector<std::string> TokenizeXml(const std::string& str);
+bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection, bool unescape = true);
+std::string unescape(const std::string &str);
+
+
+} // namespace
+
diff --git a/phrase-extract/consolidate-main.cpp b/phrase-extract/consolidate-main.cpp
index 7f17eb1c8..4ff0b5373 100644
--- a/phrase-extract/consolidate-main.cpp
+++ b/phrase-extract/consolidate-main.cpp
@@ -214,11 +214,11 @@ void loadCountOfCounts( const std::string& fileNameCountOfCounts )
}
-void processFiles( const std::string& fileNameDirect,
- const std::string& fileNameIndirect,
- const std::string& fileNameConsolidated,
- const std::string& fileNameCountOfCounts,
- const std::string& fileNameSourceLabelSet,
+void processFiles( const std::string& fileNameDirect,
+ const std::string& fileNameIndirect,
+ const std::string& fileNameConsolidated,
+ const std::string& fileNameCountOfCounts,
+ const std::string& fileNameSourceLabelSet,
const std::string& fileNamePartsOfSpeechVocabulary )
{
if (goodTuringFlag || kneserNeyFlag)
@@ -260,9 +260,9 @@ void processFiles( const std::string& fileNameDirect,
// indirect: source target probabilities
// consistency checks
- UTIL_THROW_IF2(itemDirect[0].compare( itemIndirect[0] ) != 0,
+ UTIL_THROW_IF2(itemDirect[0].compare( itemIndirect[0] ) != 0,
"target phrase does not match in line " << i << ": '" << itemDirect[0] << "' != '" << itemIndirect[0] << "'");
- UTIL_THROW_IF2(itemDirect[1].compare( itemIndirect[1] ) != 0,
+ UTIL_THROW_IF2(itemDirect[1].compare( itemIndirect[1] ) != 0,
"source phrase does not match in line " << i << ": '" << itemDirect[1] << "' != '" << itemIndirect[1] << "'");
// SCORES ...
diff --git a/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index a9a0bffc2..7e9a3ec0a 100644
--- a/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
@@ -344,7 +344,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
std::set<std::string> strippedTargetLabelSet;
std::map<std::string, int> strippedTargetTopLabelSet;
- if (options.stripBitParLabels &&
+ if (options.stripBitParLabels &&
(!options.glueGrammarFile.empty() || !options.unknownWordSoftMatchesFile.empty())) {
StripBitParLabels(targetLabelSet, targetTopLabelSet, strippedTargetLabelSet, strippedTargetTopLabelSet);
}
diff --git a/phrase-extract/extract-mixed-syntax/pugiconfig.hpp b/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
index c2196715c..5a63fd488 100644
--- a/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
+++ b/phrase-extract/extract-mixed-syntax/pugiconfig.hpp
@@ -57,7 +57,7 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
diff --git a/phrase-extract/extract-mixed-syntax/pugixml.cpp b/phrase-extract/extract-mixed-syntax/pugixml.cpp
index fa62a5e96..5076e3cc0 100644
--- a/phrase-extract/extract-mixed-syntax/pugixml.cpp
+++ b/phrase-extract/extract-mixed-syntax/pugixml.cpp
@@ -50,7 +50,7 @@
#endif
#ifdef __INTEL_COMPILER
-# pragma warning(disable: 177) // function was declared but never referenced
+# pragma warning(disable: 177) // function was declared but never referenced
# pragma warning(disable: 279) // controlling expression is constant
# pragma warning(disable: 1478 1786) // function was declared "deprecated"
# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
diff --git a/phrase-extract/extract-mixed-syntax/pugixml.hpp b/phrase-extract/extract-mixed-syntax/pugixml.hpp
index 82348bd19..a22b59d59 100644
--- a/phrase-extract/extract-mixed-syntax/pugixml.hpp
+++ b/phrase-extract/extract-mixed-syntax/pugixml.hpp
@@ -124,13 +124,13 @@ namespace pugi
// This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default.
const unsigned int parse_eol = 0x0020;
-
+
// This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default.
const unsigned int parse_wconv_attribute = 0x0040;
// This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default.
const unsigned int parse_wnorm_attribute = 0x0080;
-
+
// This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default.
const unsigned int parse_declaration = 0x0100;
@@ -168,16 +168,16 @@ namespace pugi
};
// Formatting flags
-
+
// Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default.
const unsigned int format_indent = 0x01;
-
+
// Write encoding-specific BOM to the output stream. This flag is off by default.
const unsigned int format_write_bom = 0x02;
// Use raw output mode (no indentation and no line breaks are written). This flag is off by default.
const unsigned int format_raw = 0x04;
-
+
// Omit default XML declaration even if there is no declaration in the document. This flag is off by default.
const unsigned int format_no_declaration = 0x08;
@@ -190,7 +190,7 @@ namespace pugi
// The default set of formatting flags.
// Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none.
const unsigned int format_default = format_indent;
-
+
// Forward declarations
struct xml_attribute_struct;
struct xml_node_struct;
@@ -204,7 +204,7 @@ namespace pugi
class xml_node;
class xml_text;
-
+
#ifndef PUGIXML_NO_XPATH
class xpath_node;
class xpath_node_set;
@@ -277,13 +277,13 @@ namespace pugi
private:
xml_attribute_struct* _attr;
-
+
typedef void (*unspecified_bool_type)(xml_attribute***);
public:
// Default constructor. Constructs an empty attribute.
xml_attribute();
-
+
// Constructs attribute from internal pointer
explicit xml_attribute(xml_attribute_struct* attr);
@@ -378,7 +378,7 @@ namespace pugi
// Borland C++ workaround
bool operator!() const;
-
+
// Comparison operators (compares wrapped node pointers)
bool operator==(const xml_node& r) const;
bool operator!=(const xml_node& r) const;
@@ -396,7 +396,7 @@ namespace pugi
// Get node name/value, or "" if node is empty or it has no name/value
const char_t* name() const;
const char_t* value() const;
-
+
// Get attribute list
xml_attribute first_attribute() const;
xml_attribute last_attribute() const;
@@ -408,7 +408,7 @@ namespace pugi
// Get next/previous sibling in the children list of the parent node
xml_node next_sibling() const;
xml_node previous_sibling() const;
-
+
// Get parent node
xml_node parent() const;
@@ -433,7 +433,7 @@ namespace pugi
// Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value)
bool set_name(const char_t* rhs);
bool set_value(const char_t* rhs);
-
+
// Add attribute with specified name. Returns added attribute, or empty attribute on errors.
xml_attribute append_attribute(const char_t* name);
xml_attribute prepend_attribute(const char_t* name);
@@ -476,11 +476,11 @@ namespace pugi
template <typename Predicate> xml_attribute find_attribute(Predicate pred) const
{
if (!_root) return xml_attribute();
-
+
for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute())
if (pred(attrib))
return attrib;
-
+
return xml_attribute();
}
@@ -488,11 +488,11 @@ namespace pugi
template <typename Predicate> xml_node find_child(Predicate pred) const
{
if (!_root) return xml_node();
-
+
for (xml_node node = first_child(); node; node = node.next_sibling())
if (pred(node))
return node;
-
+
return xml_node();
}
@@ -502,7 +502,7 @@ namespace pugi
if (!_root) return xml_node();
xml_node cur = first_child();
-
+
while (cur._root && cur._root != _root)
{
if (pred(cur)) return cur;
@@ -534,7 +534,7 @@ namespace pugi
// Recursively traverse subtree with xml_tree_walker
bool traverse(xml_tree_walker& walker);
-
+
#ifndef PUGIXML_NO_XPATH
// Select single node by evaluating XPath query. Returns first node from the resulting node set.
xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const;
@@ -544,7 +544,7 @@ namespace pugi
xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const;
xpath_node_set select_nodes(const xpath_query& query) const;
#endif
-
+
// Print subtree using a writer object
void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const;
@@ -781,11 +781,11 @@ namespace pugi
private:
int _depth;
-
+
protected:
// Get current traversal depth
int depth() const;
-
+
public:
xml_tree_walker();
virtual ~xml_tree_walker();
@@ -852,7 +852,7 @@ namespace pugi
char_t* _buffer;
char _memory[192];
-
+
// Non-copyable semantics
xml_document(const xml_document&);
const xml_document& operator=(const xml_document&);
@@ -960,7 +960,7 @@ namespace pugi
// Non-copyable semantics
xpath_variable(const xpath_variable&);
xpath_variable& operator=(const xpath_variable&);
-
+
public:
// Get variable name
const char_t* name() const;
@@ -1035,21 +1035,21 @@ namespace pugi
// Get query expression return type
xpath_value_type return_type() const;
-
+
// Evaluate expression as boolean value in the specified context; performs type conversion if necessary.
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
bool evaluate_boolean(const xpath_node& n) const;
-
+
// Evaluate expression as double value in the specified context; performs type conversion if necessary.
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
double evaluate_number(const xpath_node& n) const;
-
+
#ifndef PUGIXML_NO_STL
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
string_t evaluate_string(const xpath_node& n) const;
#endif
-
+
// Evaluate expression as string value in the specified context; performs type conversion if necessary.
// At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero).
// If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors.
@@ -1070,7 +1070,7 @@ namespace pugi
// Borland C++ workaround
bool operator!() const;
};
-
+
#ifndef PUGIXML_NO_EXCEPTIONS
// XPath exception class
class PUGIXML_CLASS xpath_exception: public std::exception
@@ -1089,20 +1089,20 @@ namespace pugi
const xpath_parse_result& result() const;
};
#endif
-
+
// XPath node class (either xml_node or xml_attribute)
class PUGIXML_CLASS xpath_node
{
private:
xml_node _node;
xml_attribute _attribute;
-
+
typedef void (*unspecified_bool_type)(xpath_node***);
public:
// Default constructor; constructs empty XPath node
xpath_node();
-
+
// Construct XPath node from XML node/attribute
xpath_node(const xml_node& node);
xpath_node(const xml_attribute& attribute, const xml_node& parent);
@@ -1110,13 +1110,13 @@ namespace pugi
// Get node/attribute, if any
xml_node node() const;
xml_attribute attribute() const;
-
+
// Get parent of contained node/attribute
xml_node parent() const;
// Safe bool conversion operator
operator unspecified_bool_type() const;
-
+
// Borland C++ workaround
bool operator!() const;
@@ -1142,10 +1142,10 @@ namespace pugi
type_sorted, // Sorted by document order (ascending)
type_sorted_reverse // Sorted by document order (descending)
};
-
+
// Constant iterator type
typedef const xpath_node* const_iterator;
-
+
// Default constructor. Constructs empty set.
xpath_node_set();
@@ -1154,38 +1154,38 @@ namespace pugi
// Destructor
~xpath_node_set();
-
+
// Copy constructor/assignment operator
xpath_node_set(const xpath_node_set& ns);
xpath_node_set& operator=(const xpath_node_set& ns);
// Get collection type
type_t type() const;
-
+
// Get collection size
size_t size() const;
// Indexing operator
const xpath_node& operator[](size_t index) const;
-
+
// Collection iterators
const_iterator begin() const;
const_iterator end() const;
// Sort the collection in ascending/descending order by document order
void sort(bool reverse = false);
-
+
// Get first node in the collection by document order
xpath_node first() const;
-
+
// Check if collection is empty
bool empty() const;
-
+
private:
type_t _type;
-
+
xpath_node _storage;
-
+
xpath_node* _begin;
xpath_node* _end;
@@ -1197,7 +1197,7 @@ namespace pugi
// Convert wide string to UTF8
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const wchar_t* str);
std::basic_string<char, std::char_traits<char>, std::allocator<char> > PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >& str);
-
+
// Convert UTF8 to wide string
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const char* str);
std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > PUGIXML_FUNCTION as_wide(const std::basic_string<char, std::char_traits<char>, std::allocator<char> >& str);
@@ -1205,13 +1205,13 @@ namespace pugi
// Memory allocation function interface; returns pointer to allocated memory or NULL on failure
typedef void* (*allocation_function)(size_t size);
-
+
// Memory deallocation function interface
typedef void (*deallocation_function)(void* ptr);
// Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions.
void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate);
-
+
// Get current memory management functions
allocation_function PUGIXML_FUNCTION get_memory_allocation_function();
deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function();
@@ -1253,7 +1253,7 @@ namespace std
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
- *
+ *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
diff --git a/phrase-extract/pcfg-common/pcfg.cc b/phrase-extract/pcfg-common/pcfg.cc
index cae6d4763..988367c9b 100644
--- a/phrase-extract/pcfg-common/pcfg.cc
+++ b/phrase-extract/pcfg-common/pcfg.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-common/tool.cc b/phrase-extract/pcfg-common/tool.cc
index f54e07a12..c41eaf9bd 100644
--- a/phrase-extract/pcfg-common/tool.cc
+++ b/phrase-extract/pcfg-common/tool.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-common/xml_tree_parser.cc b/phrase-extract/pcfg-common/xml_tree_parser.cc
index 29e46a9f2..f15a04811 100644
--- a/phrase-extract/pcfg-common/xml_tree_parser.cc
+++ b/phrase-extract/pcfg-common/xml_tree_parser.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-extract/main.cc b/phrase-extract/pcfg-extract/main.cc
index 84051f2e2..24549990f 100644
--- a/phrase-extract/pcfg-extract/main.cc
+++ b/phrase-extract/pcfg-extract/main.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-extract/pcfg_extract.cc b/phrase-extract/pcfg-extract/pcfg_extract.cc
index a5e06aa82..becb8edee 100644
--- a/phrase-extract/pcfg-extract/pcfg_extract.cc
+++ b/phrase-extract/pcfg-extract/pcfg_extract.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-extract/rule_collection.cc b/phrase-extract/pcfg-extract/rule_collection.cc
index 21e84d2fa..488fca6ae 100644
--- a/phrase-extract/pcfg-extract/rule_collection.cc
+++ b/phrase-extract/pcfg-extract/rule_collection.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-extract/rule_extractor.cc b/phrase-extract/pcfg-extract/rule_extractor.cc
index bb4698fae..6a99f7848 100644
--- a/phrase-extract/pcfg-extract/rule_extractor.cc
+++ b/phrase-extract/pcfg-extract/rule_extractor.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-score/main.cc b/phrase-extract/pcfg-score/main.cc
index 5ce19f797..93db0837a 100644
--- a/phrase-extract/pcfg-score/main.cc
+++ b/phrase-extract/pcfg-score/main.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-score/pcfg_score.cc b/phrase-extract/pcfg-score/pcfg_score.cc
index a561c18ed..92f214c8f 100644
--- a/phrase-extract/pcfg-score/pcfg_score.cc
+++ b/phrase-extract/pcfg-score/pcfg_score.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/phrase-extract/pcfg-score/tree_scorer.cc b/phrase-extract/pcfg-score/tree_scorer.cc
index 53b6aaccf..9a40332d2 100644
--- a/phrase-extract/pcfg-score/tree_scorer.cc
+++ b/phrase-extract/pcfg-score/tree_scorer.cc
@@ -1,17 +1,17 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
diff --git a/search/applied.hh b/search/applied.hh
index 88961775f..9464080cc 100644
--- a/search/applied.hh
+++ b/search/applied.hh
@@ -9,12 +9,12 @@
namespace search {
-// A full hypothesis: a score, arity of the rule, a pointer to the decoder's rule (Note), and pointers to non-terminals that were substituted.
+// A full hypothesis: a score, arity of the rule, a pointer to the decoder's rule (Note), and pointers to non-terminals that were substituted.
template <class Below> class GenericApplied : public Header {
public:
GenericApplied() {}
- GenericApplied(void *location, PartialEdge partial)
+ GenericApplied(void *location, PartialEdge partial)
: Header(location) {
memcpy(Base(), partial.Base(), kHeaderSize);
Below *child_out = Children();
@@ -23,7 +23,7 @@ template <class Below> class GenericApplied : public Header {
for (; part != part_end_loop; ++part, ++child_out)
*child_out = Below(part->End());
}
-
+
GenericApplied(void *location, Score score, Arity arity, Note note, Moses::WordsRange range) : Header(location, arity) {
SetScore(score);
SetNote(note);
@@ -46,7 +46,7 @@ template <class Below> class GenericApplied : public Header {
}
};
-// Applied rule that references itself.
+// Applied rule that references itself.
class Applied : public GenericApplied<Applied> {
private:
typedef GenericApplied<Applied> P;
@@ -57,7 +57,7 @@ class Applied : public GenericApplied<Applied> {
Applied(History from) : P(from) {}
};
-// How to build single-best hypotheses.
+// How to build single-best hypotheses.
class SingleBest {
public:
typedef PartialEdge Combine;
@@ -68,7 +68,7 @@ class SingleBest {
}
NBestComplete Complete(PartialEdge partial) {
- if (!partial.Valid())
+ if (!partial.Valid())
return NBestComplete(NULL, lm::ngram::ChartState(), -INFINITY);
void *place_final = pool_.Allocate(Applied::Size(partial.GetArity()));
Applied(place_final, partial);
diff --git a/search/config.hh b/search/config.hh
index ba18c09e9..dd52303cb 100644
--- a/search/config.hh
+++ b/search/config.hh
@@ -10,7 +10,7 @@ struct NBestConfig {
keep = in_size;
size = in_size;
}
-
+
unsigned int keep, size;
};
diff --git a/search/edge.hh b/search/edge.hh
index 187904bf9..cee96b474 100644
--- a/search/edge.hh
+++ b/search/edge.hh
@@ -16,13 +16,13 @@ namespace search {
// Copyable, but the copy will be shallow.
class PartialEdge : public Header {
public:
- // Allow default construction for STL.
+ // Allow default construction for STL.
PartialEdge() {}
- PartialEdge(util::Pool &pool, Arity arity)
+ PartialEdge(util::Pool &pool, Arity arity)
: Header(pool.Allocate(Size(arity, arity + 1)), arity) {}
-
- PartialEdge(util::Pool &pool, Arity arity, Arity chart_states)
+
+ PartialEdge(util::Pool &pool, Arity arity, Arity chart_states)
: Header(pool.Allocate(Size(arity, chart_states)), arity) {}
// Non-terminals
diff --git a/search/edge_generator.cc b/search/edge_generator.cc
index 1f933453d..5a6431807 100644
--- a/search/edge_generator.cc
+++ b/search/edge_generator.cc
@@ -34,7 +34,7 @@ template <class Model> void FastScore(const Context<Model> &context, Arity victi
adjustment += lm::ngram::Subsume(context.LanguageModel(), before->left, before->right, after->left, after->right, update_reveal.left.length);
}
before->right = after->right;
- // Shift the others shifted one down, covering after.
+ // Shift the others shifted one down, covering after.
for (lm::ngram::ChartState *cover = after; cover < between + incomplete; ++cover) {
*cover = *(cover + 1);
}
@@ -55,7 +55,7 @@ template <class Model> PartialEdge EdgeGenerator::Pop(Context<Model> &context) {
Arity victim_completed;
Arity incomplete;
unsigned char lowest_niceness = 255;
- // Select victim or return if complete.
+ // Select victim or return if complete.
{
Arity completed = 0;
for (Arity i = 0; i != arity; ++i) {
@@ -89,20 +89,20 @@ template <class Model> PartialEdge EdgeGenerator::Pop(Context<Model> &context) {
memcpy(alternate.Between(), top.Between(), sizeof(lm::ngram::ChartState) * (incomplete + 1));
- // TODO: dedupe?
+ // TODO: dedupe?
generate_.push(alternate);
}
-#ifndef NDEBUG
+#ifndef NDEBUG
Score before = top.GetScore();
#endif
// top is now the continuation.
FastScore(context, victim, victim - victim_completed, incomplete, old_value, top);
- // TODO: dedupe?
+ // TODO: dedupe?
generate_.push(top);
assert(lowest_niceness != 254 || top.GetScore() == before);
- // Invalid indicates no new hypothesis generated.
+ // Invalid indicates no new hypothesis generated.
return PartialEdge();
}
diff --git a/search/header.hh b/search/header.hh
index d70524097..699d8ef15 100644
--- a/search/header.hh
+++ b/search/header.hh
@@ -10,7 +10,7 @@
namespace search {
-// Copying is shallow.
+// Copying is shallow.
class Header {
public:
bool Valid() const { return base_; }
diff --git a/search/nbest.cc b/search/nbest.cc
index 43ed702cb..7cf84dbb4 100644
--- a/search/nbest.cc
+++ b/search/nbest.cc
@@ -40,7 +40,7 @@ const std::vector<Applied> &NBestList::Extract(util::Pool &pool, std::size_t n)
Score NBestList::Visit(util::Pool &pool, std::size_t index) {
if (index + 1 < revealed_.size())
return revealed_[index + 1].GetScore() - revealed_[index].GetScore();
- if (queue_.empty())
+ if (queue_.empty())
return -INFINITY;
if (index + 1 == revealed_.size())
return queue_.top().GetScore() - revealed_[index].GetScore();
@@ -81,7 +81,7 @@ void NBestList::MoveTop(util::Pool &pool) {
if (child->index_) break;
}
- // Convert QueueEntry to Applied. This leaves some unused memory.
+ // Convert QueueEntry to Applied. This leaves some unused memory.
void *overwrite = entry.Children();
for (unsigned int i = 0; i < entry.GetArity(); ++i) {
RevealedRef from(*(static_cast<const RevealedRef*>(overwrite) + i));
diff --git a/search/nbest.hh b/search/nbest.hh
index 0aa1dfd57..17b08da38 100644
--- a/search/nbest.hh
+++ b/search/nbest.hh
@@ -19,8 +19,8 @@ class NBestList;
class NBestList {
private:
class RevealedRef {
- public:
- explicit RevealedRef(History history)
+ public:
+ explicit RevealedRef(History history)
: in_(static_cast<NBestList*>(history)), index_(0) {}
private:
@@ -29,7 +29,7 @@ class NBestList {
NBestList *in_;
std::size_t index_;
};
-
+
typedef GenericApplied<RevealedRef> QueueEntry;
public:
diff --git a/search/rule.hh b/search/rule.hh
index 43ca61625..dc45f6634 100644
--- a/search/rule.hh
+++ b/search/rule.hh
@@ -16,8 +16,8 @@ struct ScoreRuleRet {
unsigned int oov;
};
-// Pass <s> and </s> normally.
-// Indicate non-terminals with kNonTerminal.
+// Pass <s> and </s> normally.
+// Indicate non-terminals with kNonTerminal.
template <class Model> ScoreRuleRet ScoreRule(const Model &model, const std::vector<lm::WordIndex> &words, lm::ngram::ChartState *state_out);
} // namespace search
diff --git a/search/types.hh b/search/types.hh
index 832ef159f..e1239dfd0 100644
--- a/search/types.hh
+++ b/search/types.hh
@@ -18,7 +18,7 @@ union Note {
typedef void *History;
struct NBestComplete {
- NBestComplete(History in_history, const lm::ngram::ChartState &in_state, Score in_score)
+ NBestComplete(History in_history, const lm::ngram::ChartState &in_state, Score in_score)
: history(in_history), state(&in_state), score(in_score) {}
History history;
diff --git a/search/vertex.cc b/search/vertex.cc
index 305ea0c99..cb1df7e51 100644
--- a/search/vertex.cc
+++ b/search/vertex.cc
@@ -20,7 +20,7 @@ class DivideLeft {
: index_(index) {}
uint64_t operator()(const lm::ngram::ChartState &state) const {
- return (index_ < state.left.length) ?
+ return (index_ < state.left.length) ?
state.left.pointers[index_] :
(kCompleteAdd - state.left.full);
}
@@ -71,7 +71,7 @@ uint64_t Identify(const lm::ngram::Left &left, unsigned char index) {
template <class Side> class DetermineSame {
public:
- DetermineSame(const Side &side, unsigned char guaranteed)
+ DetermineSame(const Side &side, unsigned char guaranteed)
: side_(side), guaranteed_(guaranteed), shared_(side.length), complete_(true) {}
void Consider(const Side &other) {
diff --git a/search/vertex.hh b/search/vertex.hh
index f560dc8c6..e04325dc3 100644
--- a/search/vertex.hh
+++ b/search/vertex.hh
@@ -54,7 +54,7 @@ class VertexNode {
void BuildExtend();
- // Should only happen to a root node when the entire vertex is empty.
+ // Should only happen to a root node when the entire vertex is empty.
bool Empty() const {
return hypos_.empty() && extend_.empty();
}
@@ -74,7 +74,7 @@ class VertexNode {
return bound_;
}
- // Will be invalid unless this is a leaf.
+ // Will be invalid unless this is a leaf.
const History End() const {
assert(hypos_.size() == 1);
return hypos_.front().history;
diff --git a/search/vertex_generator.hh b/search/vertex_generator.hh
index 328da7933..6013cdb7d 100644
--- a/search/vertex_generator.hh
+++ b/search/vertex_generator.hh
@@ -15,7 +15,7 @@ namespace search {
class ContextBase;
-// Output makes the single-best or n-best list.
+// Output makes the single-best or n-best list.
template <class Output> class VertexGenerator {
public:
VertexGenerator(ContextBase &context, Vertex &gen, Output &nbest) : context_(context), gen_(gen), nbest_(nbest) {}
@@ -49,7 +49,7 @@ template <class Output> class VertexGenerator {
// Special case for root vertex: everything should come together into the root
// node. In theory, this should happen naturally due to state collapsing with
// <s> and </s>. If that's the case, VertexGenerator is fine, though it will
-// make one connection.
+// make one connection.
template <class Output> class RootVertexGenerator {
public:
RootVertexGenerator(Vertex &gen, Output &out) : gen_(gen), out_(out) {}
@@ -66,7 +66,7 @@ template <class Output> class RootVertexGenerator {
private:
Vertex &gen_;
-
+
typename Output::Combine combine_;
Output &out_;
};
diff --git a/symal/symal.cpp b/symal/symal.cpp
index 249aa6caa..7f3e22866 100644
--- a/symal/symal.cpp
+++ b/symal/symal.cpp
@@ -15,9 +15,9 @@
using namespace std;
-#define MAX_WORD 10000 // maximum lengthsource/target strings
+#define MAX_WORD 10000 // maximum lengthsource/target strings
#define MAX_M 400 // maximum length of source strings
-#define MAX_N 400 // maximum length of target strings
+#define MAX_N 400 // maximum length of target strings
#define UNION 1
#define INTERSECT 2
@@ -512,6 +512,6 @@ int main(int argc, char** argv)
if (out != &std::cout) {
delete inp;
}
-
+
exit(0);
}
diff --git a/util/bit_packing.cc b/util/bit_packing.cc
index d3f998d46..cffd9cf62 100644
--- a/util/bit_packing.cc
+++ b/util/bit_packing.cc
@@ -9,7 +9,7 @@ namespace {
template <bool> struct StaticCheck {};
template <> struct StaticCheck<true> { typedef bool StaticAssertionPassed; };
-// If your float isn't 4 bytes, we're hosed.
+// If your float isn't 4 bytes, we're hosed.
typedef StaticCheck<sizeof(float) == 4>::StaticAssertionPassed FloatSize;
} // namespace
@@ -34,7 +34,7 @@ void BitPackingSanity() {
if (test57 != ReadInt57(mem, b, 57, (1ULL << 57) - 1))
UTIL_THROW(Exception, "The bit packing routines are failing for your architecture. Please send a bug report with your architecture, operating system, and compiler.");
}
- // TODO: more checks.
+ // TODO: more checks.
}
} // namespace util
diff --git a/util/bit_packing.hh b/util/bit_packing.hh
index 3cf3cd4a1..b24fd9c1f 100644
--- a/util/bit_packing.hh
+++ b/util/bit_packing.hh
@@ -1,7 +1,7 @@
#ifndef UTIL_BIT_PACKING_H
#define UTIL_BIT_PACKING_H
-/* Bit-level packing routines
+/* Bit-level packing routines
*
* WARNING WARNING WARNING:
* The write functions assume that memory is zero initially. This makes them
@@ -9,10 +9,10 @@
* These routines assume that unaligned access to uint64_t is fast. This is
* the case on x86_64. I'm not sure how fast unaligned 64-bit access is on
* x86 but my target audience is large language models for which 64-bit is
- * necessary.
+ * necessary.
*
* Call the BitPackingSanity function to sanity check. Calling once suffices,
- * but it may be called multiple times when that's inconvenient.
+ * but it may be called multiple times when that's inconvenient.
*
* ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at
* NICT.
@@ -25,14 +25,14 @@
#include <endian.h>
#elif !defined(_WIN32) && !defined(_WIN64)
#include <arpa/nameser_compat.h>
-#endif
+#endif
#include <stdint.h>
#include <cstring>
namespace util {
-// Fun fact: __BYTE_ORDER is wrong on Solaris Sparc, but the version without __ is correct.
+// Fun fact: __BYTE_ORDER is wrong on Solaris Sparc, but the version without __ is correct.
#if BYTE_ORDER == LITTLE_ENDIAN
inline uint8_t BitPackShift(uint8_t bit, uint8_t /*length*/) {
return bit;
@@ -56,15 +56,15 @@ inline uint64_t ReadOff(const void *base, uint64_t bit_off) {
#endif
}
-/* Pack integers up to 57 bits using their least significant digits.
+/* Pack integers up to 57 bits using their least significant digits.
* The length is specified using mask:
- * Assumes mask == (1 << length) - 1 where length <= 57.
+ * Assumes mask == (1 << length) - 1 where length <= 57.
*/
inline uint64_t ReadInt57(const void *base, uint64_t bit_off, uint8_t length, uint64_t mask) {
return (ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, length)) & mask;
}
/* Assumes value < (1 << length) and length <= 57.
- * Assumes the memory is zero initially.
+ * Assumes the memory is zero initially.
*/
inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, uint64_t value) {
#if defined(__arm) || defined(__arm__)
@@ -74,7 +74,7 @@ inline void WriteInt57(void *base, uint64_t bit_off, uint8_t length, uint64_t va
value64 |= (value << BitPackShift(bit_off & 7, length));
memcpy(base_off, &value64, sizeof(value64));
#else
- *reinterpret_cast<uint64_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
+ *reinterpret_cast<uint64_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
(value << BitPackShift(bit_off & 7, length));
#endif
}
@@ -99,7 +99,7 @@ inline void WriteInt25(void *base, uint64_t bit_off, uint8_t length, uint32_t va
value32 |= (value << BitPackShift(bit_off & 7, length));
memcpy(base_off, &value32, sizeof(value32));
#else
- *reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
+ *reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(base) + (bit_off >> 3)) |=
(value << BitPackShift(bit_off & 7, length));
#endif
}
@@ -136,7 +136,7 @@ inline void UnsetSign(float &to) {
inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
FloatEnc encoded;
encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31);
- // Sign bit set means negative.
+ // Sign bit set means negative.
encoded.i |= kSignBit;
return encoded.f;
}
@@ -150,7 +150,7 @@ inline void WriteNonPositiveFloat31(void *base, uint64_t bit_off, float value) {
void BitPackingSanity();
// Return bits required to store integers upto max_value. Not the most
-// efficient implementation, but this is only called a few times to size tries.
+// efficient implementation, but this is only called a few times to size tries.
uint8_t RequiredBits(uint64_t max_value);
struct BitsMask {
diff --git a/util/cat_compressed_main.cc b/util/cat_compressed_main.cc
index 9ec8e81f7..0c7cda936 100644
--- a/util/cat_compressed_main.cc
+++ b/util/cat_compressed_main.cc
@@ -21,7 +21,7 @@ int main(int argc, char *argv[]) {
char *arg = argv[i];
if (!strcmp(arg, "--")) break;
if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
- std::cerr <<
+ std::cerr <<
"A cat implementation that interprets compressed files.\n"
"Usage: " << argv[0] << " [file1] [file2] ...\n"
"If no file is provided, then stdin is read.\n";
diff --git a/util/ersatz_progress.cc b/util/ersatz_progress.cc
index 498ab5c58..55c82e7af 100644
--- a/util/ersatz_progress.cc
+++ b/util/ersatz_progress.cc
@@ -17,7 +17,7 @@ ErsatzProgress::~ErsatzProgress() {
if (out_) Finished();
}
-ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
+ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
: current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
if (!out_) {
next_ = std::numeric_limits<uint64_t>::max();
diff --git a/util/ersatz_progress.hh b/util/ersatz_progress.hh
index b3bef7fa6..b47aded7d 100644
--- a/util/ersatz_progress.hh
+++ b/util/ersatz_progress.hh
@@ -6,7 +6,7 @@
#include <stdint.h>
// Ersatz version of boost::progress so core language model doesn't depend on
-// boost. Also adds option to print nothing.
+// boost. Also adds option to print nothing.
namespace util {
@@ -14,10 +14,10 @@ extern const char kProgressBanner[];
class ErsatzProgress {
public:
- // No output.
+ // No output.
ErsatzProgress();
- // Null means no output. The null value is useful for passing along the ostream pointer from another caller.
+ // Null means no output. The null value is useful for passing along the ostream pointer from another caller.
explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
~ErsatzProgress();
diff --git a/util/exception.cc b/util/exception.cc
index 32d48516a..588f5eae5 100644
--- a/util/exception.cc
+++ b/util/exception.cc
@@ -30,7 +30,7 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun
/* The child class might have set some text, but we want this to come first.
* Another option would be passing this information to the constructor, but
* then child classes would have to accept constructor arguments and pass
- * them down.
+ * them down.
*/
text_ = stream_.str();
stream_.str("");
diff --git a/util/exception.hh b/util/exception.hh
index 2b503e50e..7a0e7c44a 100644
--- a/util/exception.hh
+++ b/util/exception.hh
@@ -19,10 +19,10 @@ class Exception : public std::exception {
Exception(const Exception &from);
Exception &operator=(const Exception &from);
- // Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe.
+ // Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe.
const char *what() const throw();
- // For use by the UTIL_THROW macros.
+ // For use by the UTIL_THROW macros.
void SetLocation(
const char *file,
unsigned int line,
@@ -33,7 +33,7 @@ class Exception : public std::exception {
private:
template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
- // This helps restrict operator<< defined below.
+ // This helps restrict operator<< defined below.
template <class T> struct ExceptionTag {
typedef T Identity;
};
@@ -42,9 +42,9 @@ class Exception : public std::exception {
mutable std::string text_;
};
-/* This implements the normal operator<< for Exception and all its children.
+/* This implements the normal operator<< for Exception and all its children.
* SFINAE means it only applies to Exception. Think of this as an ersatz
- * boost::enable_if.
+ * boost::enable_if.
*/
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
e.stream_ << data;
@@ -63,10 +63,10 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
/* Create an instance of Exception, add the message Modify, and throw it.
* Modify is appended to the what() message and can contain << for ostream
- * operations.
+ * operations.
*
* do .. while kludge to swallow trailing ; character
- * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
+ * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
* Arg can be a constructor argument to the exception.
*/
#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
@@ -123,7 +123,7 @@ class FileOpenException : public Exception {
~FileOpenException() throw() {}
};
-// Utilities for overflow checking.
+// Utilities for overflow checking.
class OverflowException : public Exception {
public:
OverflowException() throw();
diff --git a/util/file.hh b/util/file.hh
index ca52dbfba..bd5873cbc 100644
--- a/util/file.hh
+++ b/util/file.hh
@@ -73,12 +73,12 @@ class EndOfFileException : public Exception {
~EndOfFileException() throw();
};
-// Open for read only.
+// Open for read only.
int OpenReadOrThrow(const char *name);
-// Create file if it doesn't exist, truncate if it does. Opened for write.
+// Create file if it doesn't exist, truncate if it does. Opened for write.
int CreateOrThrow(const char *name);
-// Return value for SizeFile when it can't size properly.
+// Return value for SizeFile when it can't size properly.
const uint64_t kBadSize = (uint64_t)-1;
uint64_t SizeFile(int fd);
uint64_t SizeOrThrow(int fd);
diff --git a/util/file_piece.cc b/util/file_piece.cc
index 37e492672..c808e7d90 100644
--- a/util/file_piece.cc
+++ b/util/file_piece.cc
@@ -26,10 +26,10 @@ ParseNumberException::ParseNumberException(StringPiece value) throw() {
*this << "Could not parse \"" << value << "\" into a number";
}
-// Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
+// Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
+FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()),
progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
Initialize(name, show_progress, min_buffer);
@@ -42,7 +42,7 @@ std::string NamePossiblyFind(int fd, const char *name) {
}
} // namespace
-FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
+FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
@@ -56,7 +56,7 @@ FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buf
data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
position_ = data_.begin();
position_end_ = position_;
-
+
fell_back_.Reset(stream);
}
@@ -118,9 +118,9 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
InitializeNoRead(name, min_buffer);
if (total_size_ == kBadSize) {
- // So the assertion passes.
+ // So the assertion passes.
fallback_to_read_ = false;
- if (show_progress)
+ if (show_progress)
*show_progress << "File " << name << " isn't normal. Using slower read() instead of mmap(). No progress bar." << std::endl;
TransitionToRead();
} else {
@@ -214,7 +214,7 @@ void FilePiece::Shift() {
uint64_t desired_begin = position_ - data_.begin() + mapped_offset_;
if (!fallback_to_read_) MMapShift(desired_begin);
- // Notice an mmap failure might set the fallback.
+ // Notice an mmap failure might set the fallback.
if (fallback_to_read_) ReadShift();
for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
@@ -223,13 +223,13 @@ void FilePiece::Shift() {
}
void FilePiece::MMapShift(uint64_t desired_begin) {
- // Use mmap.
+ // Use mmap.
uint64_t ignore = desired_begin % page_;
- // Duplicate request for Shift means give more data.
+ // Duplicate request for Shift means give more data.
if (position_ == data_.begin() + ignore && position_) {
default_map_size_ *= 2;
}
- // Local version so that in case of failure it doesn't overwrite the class variable.
+ // Local version so that in case of failure it doesn't overwrite the class variable.
uint64_t mapped_offset = desired_begin - ignore;
uint64_t mapped_size;
@@ -240,7 +240,7 @@ void FilePiece::MMapShift(uint64_t desired_begin) {
mapped_size = default_map_size_;
}
- // Forcibly clear the existing mmap first.
+ // Forcibly clear the existing mmap first.
data_.reset();
try {
MapRead(POPULATE_OR_LAZY, *file_, mapped_offset, mapped_size, data_);
@@ -248,7 +248,7 @@ void FilePiece::MMapShift(uint64_t desired_begin) {
if (desired_begin) {
SeekOrThrow(*file_, desired_begin);
}
- // The mmap was scheduled to end the file, but now we're going to read it.
+ // The mmap was scheduled to end the file, but now we're going to read it.
at_end_ = false;
TransitionToRead();
return;
@@ -278,10 +278,10 @@ void FilePiece::TransitionToRead() {
void FilePiece::ReadShift() {
assert(fallback_to_read_);
- // Bytes [data_.begin(), position_) have been consumed.
- // Bytes [position_, position_end_) have been read into the buffer.
+ // Bytes [data_.begin(), position_) have been consumed.
+ // Bytes [position_, position_end_) have been read into the buffer.
- // Start at the beginning of the buffer if there's nothing useful in it.
+ // Start at the beginning of the buffer if there's nothing useful in it.
if (position_ == position_end_) {
mapped_offset_ += (position_end_ - data_.begin());
position_ = data_.begin();
@@ -292,7 +292,7 @@ void FilePiece::ReadShift() {
if (already_read == default_map_size_) {
if (position_ == data_.begin()) {
- // Buffer too small.
+ // Buffer too small.
std::size_t valid_length = position_end_ - position_;
default_map_size_ *= 2;
data_.call_realloc(default_map_size_);
diff --git a/util/fixed_array.hh b/util/fixed_array.hh
index e2aa8ae7d..610cbdf12 100644
--- a/util/fixed_array.hh
+++ b/util/fixed_array.hh
@@ -23,22 +23,22 @@ template <class T> class FixedArray {
Init(limit);
}
- /**
+ /**
* Constructs an instance, but does not initialize it.
*
* Any objects constructed in this manner must be subsequently @ref FixedArray::Init() "initialized" prior to use.
*
* @see FixedArray::Init()
*/
- FixedArray()
- : newed_end_(NULL)
+ FixedArray()
+ : newed_end_(NULL)
#ifndef NDEBUG
- , allocated_end_(NULL)
+ , allocated_end_(NULL)
#endif
{}
- /**
- * Initialize with a given size bound but do not construct the objects.
+ /**
+ * Initialize with a given size bound but do not construct the objects.
*
* This method is responsible for allocating memory.
* Objects stored in this array will be constructed in a location within this allocated memory.
@@ -73,37 +73,37 @@ template <class T> class FixedArray {
/** Gets a pointer to the first object currently stored in this data structure. */
T *begin() { return static_cast<T*>(block_.get()); }
-
+
/** Gets a const pointer to the last object currently stored in this data structure. */
const T *begin() const { return static_cast<const T*>(block_.get()); }
-
+
/** Gets a pointer to the last object currently stored in this data structure. */
T *end() { return newed_end_; }
-
+
/** Gets a const pointer to the last object currently stored in this data structure. */
const T *end() const { return newed_end_; }
/** Gets a reference to the last object currently stored in this data structure. */
T &back() { return *(end() - 1); }
-
+
/** Gets a const reference to the last object currently stored in this data structure. */
const T &back() const { return *(end() - 1); }
/** Gets the number of objects currently stored in this data structure. */
std::size_t size() const { return end() - begin(); }
-
+
/** Returns true if there are no objects currently stored in this data structure. */
bool empty() const { return begin() == end(); }
- /**
- * Gets a reference to the object with index i currently stored in this data structure.
+ /**
+ * Gets a reference to the object with index i currently stored in this data structure.
*
* @param i Index of the object to reference
*/
T &operator[](std::size_t i) { return begin()[i]; }
-
- /**
- * Gets a const reference to the object with index i currently stored in this data structure.
+
+ /**
+ * Gets a const reference to the object with index i currently stored in this data structure.
*
* @param i Index of the object to reference
*/
diff --git a/util/getopt.c b/util/getopt.c
index 992c96b0c..50eef42cc 100644
--- a/util/getopt.c
+++ b/util/getopt.c
@@ -3,7 +3,7 @@ POSIX getopt for Windows
AT&T Public License
-Code given out at the 1985 UNIFORUM conference in Dallas.
+Code given out at the 1985 UNIFORUM conference in Dallas.
*/
#ifndef __GNUC__
diff --git a/util/getopt.hh b/util/getopt.hh
index 50eab56f4..9b0792b04 100644
--- a/util/getopt.hh
+++ b/util/getopt.hh
@@ -3,7 +3,7 @@ POSIX getopt for Windows
AT&T Public License
-Code given out at the 1985 UNIFORUM conference in Dallas.
+Code given out at the 1985 UNIFORUM conference in Dallas.
*/
#ifdef __GNUC__
diff --git a/util/mmap.cc b/util/mmap.cc
index b91f181f4..7dcb57ba3 100644
--- a/util/mmap.cc
+++ b/util/mmap.cc
@@ -56,7 +56,7 @@ void UnmapOrThrow(void *start, size_t length) {
scoped_mmap::~scoped_mmap() {
if (data_ != (void*)-1) {
try {
- // Thanks Denis Filimonov for pointing out NFS likes msync first.
+ // Thanks Denis Filimonov for pointing out NFS likes msync first.
SyncOrThrow(data_, size_);
UnmapOrThrow(data_, size_);
} catch (const util::ErrnoException &e) {
diff --git a/util/mmap.hh b/util/mmap.hh
index 37feb5bee..9ac604975 100644
--- a/util/mmap.hh
+++ b/util/mmap.hh
@@ -1,6 +1,6 @@
#ifndef UTIL_MMAP_H
#define UTIL_MMAP_H
-// Utilities for mmaped files.
+// Utilities for mmaped files.
#include <cstddef>
#include <limits>
@@ -14,7 +14,7 @@ class scoped_fd;
long SizePage();
-// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
+// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
class scoped_mmap {
public:
scoped_mmap() : data_((void*)-1), size_(0) {}
@@ -47,13 +47,13 @@ class scoped_mmap {
/* For when the memory might come from mmap, new char[], or malloc. Uses NULL
* and 0 for blanks even though mmap signals errors with (void*)-1). The reset
- * function checks that blank for mmap.
+ * function checks that blank for mmap.
*/
class scoped_memory {
public:
typedef enum {MMAP_ALLOCATED, ARRAY_ALLOCATED, MALLOC_ALLOCATED, NONE_ALLOCATED} Alloc;
- scoped_memory(void *data, std::size_t size, Alloc source)
+ scoped_memory(void *data, std::size_t size, Alloc source)
: data_(data), size_(size), source_(source) {}
scoped_memory() : data_(NULL), size_(0), source_(NONE_ALLOCATED) {}
@@ -90,9 +90,9 @@ typedef enum {
LAZY,
// On linux, pass MAP_POPULATE to mmap.
POPULATE_OR_LAZY,
- // Populate on Linux. malloc and read on non-Linux.
+ // Populate on Linux. malloc and read on non-Linux.
POPULATE_OR_READ,
- // malloc and read.
+ // malloc and read.
READ,
// malloc and read in parallel (recommended for Lustre)
PARALLEL_READ,
@@ -110,18 +110,18 @@ void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scope
void MapAnonymous(std::size_t size, scoped_memory &to);
-// Open file name with mmap of size bytes, all of which are initially zero.
+// Open file name with mmap of size bytes, all of which are initially zero.
void *MapZeroedWrite(int fd, std::size_t size);
void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file);
-// msync wrapper
+// msync wrapper
void SyncOrThrow(void *start, size_t length);
// Forward rolling memory map with no overlap.
class Rolling {
public:
Rolling() {}
-
+
explicit Rolling(void *data) { Init(data); }
Rolling(const Rolling &copy_from, uint64_t increase = 0);
@@ -163,7 +163,7 @@ class Rolling {
}
return ptr_;
}
-
+
// Returns indexed pointer.
void *CheckedIndex(uint64_t index) {
return static_cast<uint8_t*>(CheckedBase(index)) + index;
@@ -178,7 +178,7 @@ class Rolling {
void *ptr_;
uint64_t current_begin_;
uint64_t current_end_;
-
+
scoped_memory mem_;
int fd_;
diff --git a/util/multi_intersection.hh b/util/multi_intersection.hh
index 2955acc72..73954608e 100644
--- a/util/multi_intersection.hh
+++ b/util/multi_intersection.hh
@@ -22,7 +22,7 @@ template <class Range> struct RangeLessBySize : public std::binary_function<cons
* order. sets is changed to truncate the beginning of each sequence to the
* location of the match or an empty set. Precondition: sets is not empty
* since the intersection over null is the universe and this function does not
- * know the universe.
+ * know the universe.
*/
template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
typedef std::vector<boost::iterator_range<Iterator> > Sets;
@@ -31,7 +31,7 @@ template <class Iterator, class Less> boost::optional<typename std::iterator_tra
assert(!sets.empty());
if (sets.front().empty()) return boost::optional<Value>();
- // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
+ // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.
Value highest(sets.front().front());
for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
diff --git a/util/multi_intersection_test.cc b/util/multi_intersection_test.cc
index 970afc171..ee5af7d61 100644
--- a/util/multi_intersection_test.cc
+++ b/util/multi_intersection_test.cc
@@ -8,7 +8,7 @@ namespace {
BOOST_AUTO_TEST_CASE(Empty) {
std::vector<boost::iterator_range<const unsigned int*> > sets;
-
+
sets.push_back(boost::iterator_range<const unsigned int*>(static_cast<const unsigned int*>(NULL), static_cast<const unsigned int*>(NULL)));
BOOST_CHECK(!FirstIntersection(sets));
}
diff --git a/util/murmur_hash.cc b/util/murmur_hash.cc
index d04e6ecd0..bf3249869 100644
--- a/util/murmur_hash.cc
+++ b/util/murmur_hash.cc
@@ -2,8 +2,8 @@
* code is released to the public domain. For business purposes, Murmurhash is
* under the MIT license."
* This is modified from the original:
- * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
- * length changed to unsigned int.
+ * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
+ * length changed to unsigned int.
* placed in namespace util
* add MurmurHashNative
* default option = 0 for seed
@@ -18,7 +18,7 @@ namespace util {
//-----------------------------------------------------------------------------
// MurmurHash2, 64-bit versions, by Austin Appleby
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
// and endian-ness issues if used across multiple platforms.
// 64-bit hash for 64-bit platforms
@@ -49,12 +49,12 @@ uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t seed )
uint64_t k = *data++;
#endif
- k *= m;
- k ^= k >> r;
- k *= m;
-
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
h ^= k;
- h *= m;
+ h *= m;
}
const unsigned char * data2 = (const unsigned char*)data;
@@ -70,13 +70,13 @@ uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t seed )
case 1: h ^= uint64_t(data2[0]);
h *= m;
};
-
+
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
-}
+}
// 64-bit hash for 32-bit platforms
@@ -151,7 +151,7 @@ uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t seed )
return h;
}
-// Trick to test for 64-bit architecture at compile time.
+// Trick to test for 64-bit architecture at compile time.
namespace {
#ifdef __clang__
#pragma clang diagnostic push
diff --git a/util/pcqueue.hh b/util/pcqueue.hh
index f74ef18e8..05c868fba 100644
--- a/util/pcqueue.hh
+++ b/util/pcqueue.hh
@@ -74,10 +74,10 @@ inline void WaitSemaphore (Semaphore &on) {
/**
* Producer consumer queue safe for multiple producers and multiple consumers.
- * T must be default constructable and have operator=.
+ * T must be default constructable and have operator=.
* The value is copied twice for Consume(T &out) or three times for Consume(),
* so larger objects should be passed via pointer.
- * Strong exception guarantee if operator= throws. Undefined if semaphores throw.
+ * Strong exception guarantee if operator= throws. Undefined if semaphores throw.
*/
template <class T> class PCQueue : boost::noncopyable {
public:
@@ -130,7 +130,7 @@ template <class T> class PCQueue : boost::noncopyable {
Consume(ret);
return ret;
}
-
+
private:
// Number of empty spaces in storage_.
Semaphore empty_;
diff --git a/util/pool.hh b/util/pool.hh
index fd55572cd..511b6d995 100644
--- a/util/pool.hh
+++ b/util/pool.hh
@@ -1,5 +1,5 @@
// Very simple pool. It can only allocate memory. And all of the memory it
-// allocates must be freed at the same time.
+// allocates must be freed at the same time.
#ifndef UTIL_POOL_H
#define UTIL_POOL_H
@@ -37,7 +37,7 @@ class Pool {
// no copying
Pool(const Pool &);
Pool &operator=(const Pool &);
-};
+};
} // namespace util
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh
index 83fd0ec12..245340ddb 100644
--- a/util/probing_hash_table.hh
+++ b/util/probing_hash_table.hh
@@ -34,7 +34,7 @@ template <class EntryT, class HashT, class EqualT> class AutoProbing;
* Memory management and initialization is externalized to make it easier to
* serialize these to disk and load them quickly.
* Uses linear probing to find value.
- * Only insert and lookup operations.
+ * Only insert and lookup operations.
*/
template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class ProbingHashTable {
public:
@@ -50,7 +50,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
return buckets * sizeof(Entry);
}
- // Must be assigned to later.
+ // Must be assigned to later.
ProbingHashTable() : entries_(0)
#ifdef DEBUG
, initialized_(false)
@@ -98,12 +98,12 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
return false;
}
if (++i == end_) i = begin_;
- }
+ }
}
void FinishedInserting() {}
- // Don't change anything related to GetKey,
+ // Don't change anything related to GetKey,
template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
#ifdef DEBUG
assert(initialized_);
@@ -136,7 +136,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
if (equal_(got, key)) { out = i; return true; }
if (equal_(got, invalid_)) return false;
if (++i == end_) i = begin_;
- }
+ }
}
// Like Find but we're sure it must be there.
@@ -253,7 +253,7 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
#endif
};
-// Resizable linear probing hash table. This owns the memory.
+// Resizable linear probing hash table. This owns the memory.
template <class EntryT, class HashT, class EqualT = std::equal_to<typename EntryT::Key> > class AutoProbing {
private:
typedef ProbingHashTable<EntryT, HashT, EqualT> Backend;
diff --git a/util/read_compressed.cc b/util/read_compressed.cc
index 7f240dd7e..504c579c5 100644
--- a/util/read_compressed.cc
+++ b/util/read_compressed.cc
@@ -60,7 +60,7 @@ namespace {
ReadBase *ReadFactory(int fd, uint64_t &raw_amount, const void *already_data, std::size_t already_size, bool require_compressed);
-// Completed file that other classes can thunk to.
+// Completed file that other classes can thunk to.
class Complete : public ReadBase {
public:
std::size_t Read(void *, std::size_t, ReadCompressed &) {
@@ -121,7 +121,7 @@ template <class Compression> class StreamCompressed : public ReadBase {
: file_(fd),
in_buffer_(MallocOrThrow(kInputBuffer)),
back_(memcpy(in_buffer_.get(), already_data, already_size), already_size) {}
-
+
std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
if (amount == 0) return 0;
back_.SetOutput(to, amount);
@@ -162,8 +162,8 @@ class GZip {
stream_.zfree = Z_NULL;
stream_.opaque = Z_NULL;
stream_.msg = NULL;
- // 32 for zlib and gzip decoding with automatic header detection.
- // 15 for maximum window size.
+ // 32 for zlib and gzip decoding with automatic header detection.
+ // 15 for maximum window size.
UTIL_THROW_IF(Z_OK != inflateInit2(&stream_, 32 + 15), GZException, "Failed to initialize zlib.");
}
diff --git a/util/read_compressed.hh b/util/read_compressed.hh
index 9b36f641c..935a49579 100644
--- a/util/read_compressed.hh
+++ b/util/read_compressed.hh
@@ -38,10 +38,10 @@ class ReadBase;
class ReadCompressed {
public:
static const std::size_t kMagicSize = 6;
- // Must have at least kMagicSize bytes.
+ // Must have at least kMagicSize bytes.
static bool DetectCompressedMagic(const void *from);
- // Takes ownership of fd.
+ // Takes ownership of fd.
explicit ReadCompressed(int fd);
// Try to avoid using this. Use the fd instead.
@@ -53,7 +53,7 @@ class ReadCompressed {
~ReadCompressed();
- // Takes ownership of fd.
+ // Takes ownership of fd.
void Reset(int fd);
// Same advice as the constructor.
@@ -74,7 +74,7 @@ class ReadCompressed {
uint64_t raw_amount_;
- // No copying.
+ // No copying.
ReadCompressed(const ReadCompressed &);
void operator=(const ReadCompressed &);
};
diff --git a/util/scoped.hh b/util/scoped.hh
index 60c36c36a..c347a43cc 100644
--- a/util/scoped.hh
+++ b/util/scoped.hh
@@ -86,7 +86,7 @@ class scoped_malloc : public scoped_c<void, std::free> {
struct scoped_delete_array_forward {
template <class T> static void Close(T *p) { delete [] p; }
};
-// Hat tip to boost.
+// Hat tip to boost.
template <class T> class scoped_array : public scoped<T, scoped_delete_array_forward> {
public:
explicit scoped_array(T *p = NULL) : scoped<T, scoped_delete_array_forward>(p) {}
diff --git a/util/sorted_uniform.hh b/util/sorted_uniform.hh
index 3673a8b5d..ddd2b3f2a 100644
--- a/util/sorted_uniform.hh
+++ b/util/sorted_uniform.hh
@@ -22,7 +22,7 @@ struct Pivot64 {
}
};
-// Use when off * width is <2^64. This is guaranteed when each of them is actually a 32-bit value.
+// Use when off * width is <2^64. This is guaranteed when each of them is actually a 32-bit value.
struct Pivot32 {
static inline std::size_t Calc(uint64_t off, uint64_t range, uint64_t width) {
return static_cast<std::size_t>((off * width) / (range + 1));
@@ -56,7 +56,7 @@ template <class Iterator, class Accessor> bool BinaryFind(
return false;
}
-// Search the range [before_it + 1, after_it - 1] for key.
+// Search the range [before_it + 1, after_it - 1] for key.
// Preconditions:
// before_v <= key <= after_v
// before_v <= all values in the range [before_it + 1, after_it - 1] <= after_v
@@ -90,7 +90,7 @@ template <class Iterator, class Accessor, class Pivot> bool SortedUniformFind(co
if (key == below) { out = begin; return true; }
return false;
}
- // Make the range [begin, end].
+ // Make the range [begin, end].
--end;
typename Accessor::Key above(accessor(end));
if (key >= above) {
diff --git a/util/sorted_uniform_test.cc b/util/sorted_uniform_test.cc
index d9f6fad1e..39f05e57e 100644
--- a/util/sorted_uniform_test.cc
+++ b/util/sorted_uniform_test.cc
@@ -87,7 +87,7 @@ template <class Key> void RandomTest(Key upper, size_t entries, size_t queries)
}
std::sort(backing.begin(), backing.end());
- // Random queries.
+ // Random queries.
for (size_t i = 0; i < queries; ++i) {
const Key key = gen_key();
Check<Key, unsigned char>(&*backing.begin(), &*backing.end(), reference, key);
diff --git a/util/stream/block.hh b/util/stream/block.hh
index aa7e28bb1..6a70dba3e 100644
--- a/util/stream/block.hh
+++ b/util/stream/block.hh
@@ -12,13 +12,13 @@ namespace stream {
*/
class Block {
public:
-
- /**
- * Constructs an empty block.
+
+ /**
+ * Constructs an empty block.
*/
Block() : mem_(NULL), valid_size_(0) {}
- /**
+ /**
* Constructs a block that encapsulates a segment of memory.
*
* @param[in] mem The segment of memory to encapsulate
@@ -33,9 +33,9 @@ class Block {
*/
void SetValidSize(std::size_t to) { valid_size_ = to; }
- /**
+ /**
* Gets the number of bytes in this block that should be interpreted as valid.
- * This is important because read might fill in less than Allocated at EOF.
+ * This is important because read might fill in less than Allocated at EOF.
*/
std::size_t ValidSize() const { return valid_size_; }
@@ -45,34 +45,34 @@ class Block {
/** Gets a const void pointer to the memory underlying this block. */
const void *Get() const { return mem_; }
-
+
/**
* Gets a const void pointer to the end of the valid section of memory
* encapsulated by this block.
*/
- const void *ValidEnd() const {
+ const void *ValidEnd() const {
return reinterpret_cast<const uint8_t*>(mem_) + valid_size_;
}
/**
* Returns true if this block encapsulates a valid (non-NULL) block of memory.
- *
+ *
* This method is a user-defined implicit conversion function to boolean;
- * among other things, this method enables bare instances of this class
+ * among other things, this method enables bare instances of this class
* to be used as the condition of an if statement.
*/
operator bool() const { return mem_ != NULL; }
-
+
/**
* Returns true if this block is empty.
- *
+ *
* In other words, if Get()==NULL, this method will return true.
*/
bool operator!() const { return mem_ == NULL; }
-
+
private:
friend class Link;
-
+
/**
* Points this block's memory at NULL.
*
diff --git a/util/stream/chain.cc b/util/stream/chain.cc
index 7b68400a8..39f2f3fbb 100644
--- a/util/stream/chain.cc
+++ b/util/stream/chain.cc
@@ -37,7 +37,7 @@ Chain::Chain(const ChainConfig &config) : config_(config), complete_called_(fals
UTIL_THROW_IF(!config.entry_size, ChainConfigException, "zero-size entries.");
UTIL_THROW_IF(!config.block_count, ChainConfigException, "block count zero");
UTIL_THROW_IF(config.total_memory < config.entry_size * config.block_count, ChainConfigException, config.total_memory << " total memory, too small for " << config.block_count << " blocks of containing entries of size " << config.entry_size);
- // Round down block size to a multiple of entry size.
+ // Round down block size to a multiple of entry size.
block_size_ = config.total_memory / (config.block_count * config.entry_size) * config.entry_size;
}
@@ -65,7 +65,7 @@ Chain &Chain::operator>>(const PWriteAndRecycle &writer) {
void Chain::Wait(bool release_memory) {
if (queues_.empty()) {
assert(threads_.empty());
- return; // Nothing to wait for.
+ return; // Nothing to wait for.
}
if (!complete_called_) CompleteLoop();
threads_.clear();
@@ -84,15 +84,15 @@ void Chain::Wait(bool release_memory) {
void Chain::Start() {
Wait(false);
if (!memory_.get()) {
- // Allocate memory.
+ // Allocate memory.
assert(threads_.empty());
assert(queues_.empty());
std::size_t malloc_size = block_size_ * config_.block_count;
memory_.reset(MallocOrThrow(malloc_size));
}
- // This queue can accomodate all blocks.
+ // This queue can accomodate all blocks.
queues_.push_back(new PCQueue<Block>(config_.block_count));
- // Populate the lead queue with blocks.
+ // Populate the lead queue with blocks.
uint8_t *base = static_cast<uint8_t*>(memory_.get());
for (std::size_t i = 0; i < config_.block_count; ++i) {
queues_.front().Produce(Block(base, block_size_));
@@ -124,7 +124,7 @@ Link::Link(const ChainPosition &position) : in_(NULL) {
Link::~Link() {
if (current_) {
- // Probably an exception unwinding.
+ // Probably an exception unwinding.
std::cerr << "Last input should have been poison." << std::endl;
// abort();
} else {
diff --git a/util/stream/chain.hh b/util/stream/chain.hh
index 28dc8060e..0cd8c2aae 100644
--- a/util/stream/chain.hh
+++ b/util/stream/chain.hh
@@ -23,10 +23,10 @@ class ChainConfigException : public Exception {
};
class Chain;
-
+
/**
* Encapsulates a @ref PCQueue "producer queue" and a @ref PCQueue "consumer queue" within a @ref Chain "chain".
- *
+ *
* Specifies position in chain for Link constructor.
*/
class ChainPosition {
@@ -35,7 +35,7 @@ class ChainPosition {
private:
friend class Chain;
friend class Link;
- ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress)
+ ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress)
: in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
PCQueue<Block> *in_, *out_;
@@ -45,7 +45,7 @@ class ChainPosition {
WorkerProgress progress_;
};
-
+
/**
* Encapsulates a worker thread processing data at a given position in the chain.
*
@@ -53,7 +53,7 @@ class ChainPosition {
*/
class Thread {
public:
-
+
/**
* Constructs a new Thread in which the provided Worker is Run().
*
@@ -102,7 +102,7 @@ class Recycler {
extern const Recycler kRecycle;
class WriteAndRecycle;
class PWriteAndRecycle;
-
+
/**
* Represents a sequence of workers, through which @ref Block "blocks" can pass.
*/
@@ -113,10 +113,10 @@ class Chain {
};
public:
-
- /**
+
+ /**
* Constructs a configured Chain.
- *
+ *
* @param config Specifies how to configure the Chain.
*/
explicit Chain(const ChainConfig &config);
@@ -146,7 +146,7 @@ class Chain {
std::size_t EntrySize() const {
return config_.entry_size;
}
-
+
/**
* Gets the inital @ref Block::ValidSize "valid size" for @ref Block "blocks" in this chain.
*
@@ -159,10 +159,10 @@ class Chain {
/** Two ways to add to the chain: Add() or operator>>. */
ChainPosition Add();
- /**
+ /**
* Adds a new worker to this chain,
* and runs that worker in a new Thread owned by this chain.
- *
+ *
* The worker must have a Run method that accepts a position argument.
*
* @see Thread::operator()()
@@ -173,10 +173,10 @@ class Chain {
return *this;
}
- /**
+ /**
* Adds a new worker to this chain (but avoids copying that worker),
* and runs that worker in a new Thread owned by this chain.
- *
+ *
* The worker must have a Run method that accepts a position argument.
*
* @see Thread::operator()()
@@ -187,14 +187,14 @@ class Chain {
return *this;
}
- // Note that Link and Stream also define operator>> outside this class.
+ // Note that Link and Stream also define operator>> outside this class.
- // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.
+ // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.
void CompleteLoop() {
threads_.push_back(new Thread(Complete(), kRecycle));
}
- /**
+ /**
* Adds a Recycler worker to this chain,
* and runs that worker in a new Thread owned by this chain.
*/
@@ -203,17 +203,17 @@ class Chain {
return *this;
}
- /**
+ /**
* Adds a WriteAndRecycle worker to this chain,
* and runs that worker in a new Thread owned by this chain.
*/
Chain &operator>>(const WriteAndRecycle &writer);
Chain &operator>>(const PWriteAndRecycle &writer);
- // Chains are reusable. Call Wait to wait for everything to finish and free memory.
+ // Chains are reusable. Call Wait to wait for everything to finish and free memory.
void Wait(bool release_memory = true);
- // Waits for the current chain to complete (if any) then starts again.
+ // Waits for the current chain to complete (if any) then starts again.
void Start();
bool Running() const { return !queues_.empty(); }
@@ -237,29 +237,29 @@ class Chain {
};
// Create the link in the worker thread using the position token.
-/**
+/**
* Represents a C++ style iterator over @ref Block "blocks".
*/
class Link {
public:
-
+
// Either default construct and Init or just construct all at once.
-
+
/**
* Constructs an @ref Init "initialized" link.
*
* @see Init
*/
explicit Link(const ChainPosition &position);
-
- /**
- * Constructs a link that must subsequently be @ref Init "initialized".
+
+ /**
+ * Constructs a link that must subsequently be @ref Init "initialized".
*
* @see Init
*/
Link();
-
- /**
+
+ /**
* Initializes the link with the input @ref PCQueue "consumer queue" and output @ref PCQueue "producer queue" at a given @ref ChainPosition "position" in the @ref Chain "chain".
*
* @see Link()
@@ -269,7 +269,7 @@ class Link {
/**
* Destructs the link object.
*
- * If necessary, this method will pass a poison block
+ * If necessary, this method will pass a poison block
* to this link's output @ref PCQueue "producer queue".
*
* @see Block::SetToPoison()
@@ -290,7 +290,7 @@ class Link {
* Gets a pointer to the @ref Block "block" at this link.
*/
Block *operator->() { return &current_; }
-
+
/**
* Gets a const pointer to the @ref Block "block" at this link.
*/
@@ -303,25 +303,25 @@ class Link {
/**
* Returns true if the @ref Block "block" at this link encapsulates a valid (non-NULL) block of memory.
- *
+ *
* This method is a user-defined implicit conversion function to boolean;
- * among other things, this method enables bare instances of this class
+ * among other things, this method enables bare instances of this class
* to be used as the condition of an if statement.
*/
operator bool() const { return current_; }
- /**
+ /**
* @ref Block::SetToPoison() "Poisons" the @ref Block "block" at this link,
* and passes this now-poisoned block to this link's output @ref PCQueue "producer queue".
*
* @see Block::SetToPoison()
*/
void Poison();
-
+
private:
Block current_;
PCQueue<Block> *in_, *out_;
-
+
bool poisoned_;
WorkerProgress progress_;
diff --git a/util/stream/config.hh b/util/stream/config.hh
index 6bad36bc5..e94cf3487 100644
--- a/util/stream/config.hh
+++ b/util/stream/config.hh
@@ -10,11 +10,11 @@ namespace util { namespace stream {
* Represents how a chain should be configured.
*/
struct ChainConfig {
-
+
/** Constructs an configuration with underspecified (or default) parameters. */
ChainConfig() {}
- /**
+ /**
* Constructs a chain configuration object.
*
* @param [in] in_entry_size Number of bytes in each record.
@@ -29,26 +29,26 @@ struct ChainConfig {
* Number of bytes in each record.
*/
std::size_t entry_size;
-
+
/**
* Number of blocks in the chain.
*/
std::size_t block_count;
-
- /**
+
+ /**
* Total number of bytes available to the chain.
* This value will be divided amongst the blocks in the chain.
- * Chain's constructor will make this a multiple of entry_size.
+ * Chain's constructor will make this a multiple of entry_size.
*/
std::size_t total_memory;
};
-
+
/**
* Represents how a sorter should be configured.
*/
struct SortConfig {
-
+
/** Filename prefix where temporary files should be placed. */
std::string temp_prefix;
diff --git a/util/stream/io.cc b/util/stream/io.cc
index fa8467abd..c272d779c 100644
--- a/util/stream/io.cc
+++ b/util/stream/io.cc
@@ -16,7 +16,7 @@ void Read::Run(const ChainPosition &position) {
const std::size_t entry_size = position.GetChain().EntrySize();
for (Link link(position); link; ++link) {
std::size_t got = util::ReadOrEOF(file_, link->Get(), block_size);
- UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << ".");
+ UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << ".");
if (got == 0) {
link.Poison();
return;
diff --git a/util/stream/io.hh b/util/stream/io.hh
index 8dae2cbff..c3b53bbfe 100644
--- a/util/stream/io.hh
+++ b/util/stream/io.hh
@@ -18,12 +18,12 @@ class ReadSizeException : public util::Exception {
class Read {
public:
explicit Read(int fd) : file_(fd) {}
- void Run(const ChainPosition &position);
+ void Run(const ChainPosition &position);
private:
int file_;
};
-// Like read but uses pread so that the file can be accessed from multiple threads.
+// Like read but uses pread so that the file can be accessed from multiple threads.
class PRead {
public:
explicit PRead(int fd, bool take_own = false) : file_(fd), own_(take_own) {}
@@ -60,7 +60,7 @@ class PWriteAndRecycle {
};
-// Reuse the same file over and over again to buffer output.
+// Reuse the same file over and over again to buffer output.
class FileBuffer {
public:
explicit FileBuffer(int fd) : file_(fd) {}
diff --git a/util/stream/line_input.cc b/util/stream/line_input.cc
index dafa50207..0ad8800f6 100644
--- a/util/stream/line_input.cc
+++ b/util/stream/line_input.cc
@@ -14,7 +14,7 @@ void LineInput::Run(const ChainPosition &position) {
ReadCompressed reader(fd_);
// Holding area for beginning of line to be placed in next block.
std::vector<char> carry;
-
+
for (Link block(position); ; ++block) {
char *to = static_cast<char*>(block->Get());
char *begin = to;
@@ -39,7 +39,7 @@ void LineInput::Run(const ChainPosition &position) {
UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ". Is this a text file?");
if (*newline == '\n') break;
}
-
+
// Copy everything after the last newline to the carry.
carry.clear();
carry.resize(to - (newline + 1));
diff --git a/util/stream/multi_progress.cc b/util/stream/multi_progress.cc
index 7d6a6a73a..59750f516 100644
--- a/util/stream/multi_progress.cc
+++ b/util/stream/multi_progress.cc
@@ -31,9 +31,9 @@ MultiProgress::~MultiProgress() {
}
void MultiProgress::Activate() {
- active_ =
+ active_ =
#if !defined(_WIN32) && !defined(_WIN64)
- // Is stderr a terminal?
+ // Is stderr a terminal?
(isatty(2) == 1)
#else
true
diff --git a/util/stream/multi_progress.hh b/util/stream/multi_progress.hh
index 41d40075a..f9e6423e3 100644
--- a/util/stream/multi_progress.hh
+++ b/util/stream/multi_progress.hh
@@ -38,7 +38,7 @@ class MultiProgress {
boost::mutex mutex_;
- // \0 at the end.
+ // \0 at the end.
char display_[kWidth + 1];
std::size_t character_handout_;
@@ -49,10 +49,10 @@ class MultiProgress {
class WorkerProgress {
public:
- // Default contrutor must be initialized with operator= later.
+ // Default contrutor must be initialized with operator= later.
WorkerProgress() : parent_(NULL) {}
- // Not threadsafe for the same worker by default.
+ // Not threadsafe for the same worker by default.
WorkerProgress &operator++() {
if (++current_ >= next_) {
parent_->Milestone(*this);
@@ -70,17 +70,17 @@ class WorkerProgress {
private:
friend class MultiProgress;
- WorkerProgress(uint64_t next, MultiProgress &parent, char character)
+ WorkerProgress(uint64_t next, MultiProgress &parent, char character)
: current_(0), next_(next), parent_(&parent), stone_(0), character_(character) {}
uint64_t current_, next_;
MultiProgress *parent_;
- // Previous milestone reached.
+ // Previous milestone reached.
unsigned char stone_;
- // Character to display in bar.
+ // Character to display in bar.
char character_;
};
diff --git a/util/stream/multi_stream.hh b/util/stream/multi_stream.hh
index 2772a7081..b1461f964 100644
--- a/util/stream/multi_stream.hh
+++ b/util/stream/multi_stream.hh
@@ -50,7 +50,7 @@ class Chains : public util::FixedArray<util::stream::Chain> {
}
Chains &operator>>(const util::stream::Recycler &recycler) {
- for (util::stream::Chain *i = begin(); i != end(); ++i)
+ for (util::stream::Chain *i = begin(); i != end(); ++i)
*i >> recycler;
return *this;
}
diff --git a/util/stream/sort.hh b/util/stream/sort.hh
index 9082cfdde..a1e0a8539 100644
--- a/util/stream/sort.hh
+++ b/util/stream/sort.hh
@@ -3,16 +3,16 @@
* Chain(config) >> Read(file) >> sorter.Unsorted();
* Stream stream;
* Chain chain(config) >> sorter.Sorted(internal_config, lazy_config) >> stream;
- *
- * Note that sorter must outlive any threads that use Unsorted or Sorted.
+ *
+ * Note that sorter must outlive any threads that use Unsorted or Sorted.
*
* Combiners take the form:
* bool operator()(void *into, const void *option, const Compare &compare) const
* which returns true iff a combination happened. The sorting algorithm
- * guarantees compare(into, option). But it does not guarantee
- * compare(option, into).
+ * guarantees compare(into, option). But it does not guarantee
+ * compare(option, into).
* Currently, combining is only done in merge steps, not during on-the-fly
- * sort. Use a hash table for that.
+ * sort. Use a hash table for that.
*/
#ifndef UTIL_STREAM_SORT_H
@@ -37,12 +37,12 @@ namespace util {
namespace stream {
struct NeverCombine {
- template <class Compare> bool operator()(const void *, const void *, const Compare &) const {
+ template <class Compare> bool operator()(const void *, const void *, const Compare &) const {
return false;
}
};
-// Manage the offsets of sorted blocks in a file.
+// Manage the offsets of sorted blocks in a file.
class Offsets {
public:
explicit Offsets(int fd) : log_(fd) {
@@ -150,7 +150,7 @@ template <class Compare> class MergeQueue {
}
private:
- // Priority queue contains these entries.
+ // Priority queue contains these entries.
class Entry {
public:
Entry() {}
@@ -195,7 +195,7 @@ template <class Compare> class MergeQueue {
uint64_t remaining_, offset_;
};
- // Wrapper comparison function for queue entries.
+ // Wrapper comparison function for queue entries.
class Greater : public std::binary_function<const Entry &, const Entry &, bool> {
public:
explicit Greater(const Compare &compare) : compare_(compare) {}
@@ -217,10 +217,10 @@ template <class Compare> class MergeQueue {
};
/* A worker object that merges. If the number of pieces to merge exceeds the
- * arity, it outputs multiple sorted blocks, recording to out_offsets.
+ * arity, it outputs multiple sorted blocks, recording to out_offsets.
* However, users will only every see a single sorted block out output because
* Sort::Sorted insures the arity is higher than the number of pieces before
- * returning this.
+ * returning this.
*/
template <class Compare, class Combine> class MergingReader {
public:
@@ -235,7 +235,7 @@ template <class Compare, class Combine> class MergingReader {
}
void Run(const ChainPosition &position, bool assert_one) {
- // Special case: nothing to read.
+ // Special case: nothing to read.
if (!in_offsets_->RemainingBlocks()) {
Link l(position);
l.Poison();
@@ -267,7 +267,7 @@ template <class Compare, class Combine> class MergingReader {
// Populate queue.
MergeQueue<Compare> queue(in_, per_buffer, entry_size, compare_);
- for (uint8_t *buf = static_cast<uint8_t*>(buffer.get());
+ for (uint8_t *buf = static_cast<uint8_t*>(buffer.get());
in_offsets_->RemainingBlocks() && (buf + std::min(per_buffer, in_offsets_->PeekSize()) <= buffer_end);) {
uint64_t offset = in_offsets_->TotalOffset();
uint64_t size = in_offsets_->NextSize();
@@ -285,7 +285,7 @@ template <class Compare, class Combine> class MergingReader {
}
uint64_t written = 0;
- // Merge including combiner support.
+ // Merge including combiner support.
memcpy(str.Get(), queue.Top(), entry_size);
for (queue.Pop(); !queue.Empty(); queue.Pop()) {
if (!combine_(str.Get(), queue.Top(), compare_)) {
@@ -300,9 +300,9 @@ template <class Compare, class Combine> class MergingReader {
str.Poison();
}
- private:
+ private:
void ReadSingle(uint64_t offset, const uint64_t size, const ChainPosition &position) {
- // Special case: only one to read.
+ // Special case: only one to read.
const uint64_t end = offset + size;
const uint64_t block_size = position.GetChain().BlockSize();
Link l(position);
@@ -315,7 +315,7 @@ template <class Compare, class Combine> class MergingReader {
(++l).Poison();
return;
}
-
+
Compare compare_;
Combine combine_;
@@ -326,17 +326,17 @@ template <class Compare, class Combine> class MergingReader {
private:
Offsets *out_offsets_;
-
+
std::size_t buffer_size_;
std::size_t total_memory_;
};
-// The lazy step owns the remaining files. This keeps track of them.
+// The lazy step owns the remaining files. This keeps track of them.
template <class Compare, class Combine> class OwningMergingReader : public MergingReader<Compare, Combine> {
private:
typedef MergingReader<Compare, Combine> P;
public:
- OwningMergingReader(int data, const Offsets &offsets, std::size_t buffer, std::size_t lazy, const Compare &compare, const Combine &combine)
+ OwningMergingReader(int data, const Offsets &offsets, std::size_t buffer, std::size_t lazy, const Compare &compare, const Combine &combine)
: P(data, NULL, NULL, buffer, lazy, compare, combine),
data_(data),
offsets_(offsets) {}
@@ -353,7 +353,7 @@ template <class Compare, class Combine> class OwningMergingReader : public Mergi
Offsets offsets_;
};
-// Don't use this directly. Worker that sorts blocks.
+// Don't use this directly. Worker that sorts blocks.
template <class Compare> class BlockSorter {
public:
BlockSorter(Offsets &offsets, const Compare &compare) :
@@ -362,7 +362,7 @@ template <class Compare> class BlockSorter {
void Run(const ChainPosition &position) {
const std::size_t entry_size = position.GetChain().EntrySize();
for (Link link(position); link; ++link) {
- // Record the size of each block in a separate file.
+ // Record the size of each block in a separate file.
offsets_->Append(link->ValidSize());
void *end = static_cast<uint8_t*>(link->Get()) + link->ValidSize();
#if defined(_WIN32) || defined(_WIN64)
@@ -399,7 +399,7 @@ template <class Compare, class Combine = NeverCombine> class Sort {
compare_(compare), combine_(combine),
entry_size_(in.EntrySize()) {
UTIL_THROW_IF(!entry_size_, BadSortConfig, "Sorting entries of size 0");
- // Make buffer_size a multiple of the entry_size.
+ // Make buffer_size a multiple of the entry_size.
config_.buffer_size -= config_.buffer_size % entry_size_;
UTIL_THROW_IF(!config_.buffer_size, BadSortConfig, "Sort buffer too small");
UTIL_THROW_IF(config_.total_memory < config_.buffer_size * 4, BadSortConfig, "Sorting memory " << config_.total_memory << " is too small for four buffers (two read and two write).");
@@ -429,7 +429,7 @@ template <class Compare, class Combine = NeverCombine> class Sort {
Offsets offsets2(offsets2_file.get());
Offsets *offsets_in = &offsets_, *offsets_out = &offsets2;
- // Double buffered writing.
+ // Double buffered writing.
ChainConfig chain_config;
chain_config.entry_size = entry_size_;
chain_config.block_count = 2;
@@ -472,7 +472,7 @@ template <class Compare, class Combine = NeverCombine> class Sort {
}
// Output to chain, using this amount of memory, maximum, for lazy merge
- // sort.
+ // sort.
void Output(Chain &out, std::size_t lazy_memory) {
Merge(lazy_memory);
out.SetProgressTarget(Size());
@@ -483,15 +483,15 @@ template <class Compare, class Combine = NeverCombine> class Sort {
/* If a pipeline step is reading sorted input and writing to a different
* sort order, then there's a trade-off between using RAM to read lazily
- * (avoiding copying the file) and using RAM to increase block size and,
+ * (avoiding copying the file) and using RAM to increase block size and,
* therefore, decrease the number of merge sort passes in the next
- * iteration.
- *
+ * iteration.
+ *
* Merge sort takes log_{arity}(pieces) passes. Thus, each time the chain
* block size is multiplied by arity, the number of output passes decreases
* by one. Up to a constant, then, log_{arity}(chain) is the number of
* passes saved. Chain simply divides the memory evenly over all blocks.
- *
+ *
* Lazy sort saves this many passes (up to a constant)
* log_{arity}((memory-lazy)/block_count) + 1
* Non-lazy sort saves this many passes (up to the same constant):
@@ -535,7 +535,7 @@ template <class Compare, class Combine = NeverCombine> class Sort {
const std::size_t entry_size_;
};
-// returns bytes to be read on demand.
+// returns bytes to be read on demand.
template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = NeverCombine()) {
Sort<Compare, Combine> sorter(chain, config, compare, combine);
chain.Wait(true);
diff --git a/util/stream/sort_test.cc b/util/stream/sort_test.cc
index fd7705cd9..fc97ffdbf 100644
--- a/util/stream/sort_test.cc
+++ b/util/stream/sort_test.cc
@@ -37,7 +37,7 @@ BOOST_AUTO_TEST_CASE(FromShuffled) {
shuffled.push_back(i);
}
std::random_shuffle(shuffled.begin(), shuffled.end());
-
+
ChainConfig config;
config.entry_size = 8;
config.total_memory = 800;
diff --git a/util/stream/stream.hh b/util/stream/stream.hh
index 0e37f5d51..ee1e9fa83 100644
--- a/util/stream/stream.hh
+++ b/util/stream/stream.hh
@@ -37,7 +37,7 @@ class Stream : boost::noncopyable {
++block_it_;
block_it_.Poison();
}
-
+
Stream &operator++() {
assert(*this);
assert(current_ < end_);
diff --git a/util/stream/timer.hh b/util/stream/timer.hh
index 06488a17e..9e9573d15 100644
--- a/util/stream/timer.hh
+++ b/util/stream/timer.hh
@@ -10,7 +10,7 @@
#define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
#else
//#warning Using Boost older than 1.48. Timing information will not be available.*/
-#define UTIL_TIMER(str)
+#define UTIL_TIMER(str)
//#endif
#endif // UTIL_STREAM_TIMER_H
diff --git a/util/thread_pool.hh b/util/thread_pool.hh
index d385b1a74..dce987c40 100644
--- a/util/thread_pool.hh
+++ b/util/thread_pool.hh
@@ -48,7 +48,7 @@ template <class HandlerT> class Worker : boost::noncopyable {
PCQueue<Request> &in_;
boost::optional<Handler> handler_;
-
+
const Request poison_;
boost::thread thread_;
diff --git a/util/tokenize_piece.hh b/util/tokenize_piece.hh
index 8621705e8..9da5fa3c8 100644
--- a/util/tokenize_piece.hh
+++ b/util/tokenize_piece.hh
@@ -127,7 +127,7 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
} else {
after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size());
}
- } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.
+ } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.
}
bool equal(const TokenIter<Find, SkipEmpty> &other) const {
diff --git a/util/usage.cc b/util/usage.cc
index bbb209306..f2b661014 100644
--- a/util/usage.cc
+++ b/util/usage.cc
@@ -12,7 +12,7 @@
#include <ctime>
#if defined(_WIN32) || defined(_WIN64)
// This code lifted from physmem.c in gnulib. See the copyright statement
-// below.
+// below.
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
/* MEMORYSTATUSEX is missing from older windows headers, so define
@@ -256,14 +256,14 @@ template <class Num> uint64_t ParseNum(const std::string &arg) {
std::string throwaway;
UTIL_THROW_IF_ARG(stream >> throwaway, SizeParseError, (arg), "because there was more cruft " << throwaway << " after the number.");
- // Silly sort, using kilobytes as your default unit.
+ // Silly sort, using kilobytes as your default unit.
if (after.empty()) after = "K";
if (after == "%") {
uint64_t mem = GuessPhysicalMemory();
UTIL_THROW_IF_ARG(!mem, SizeParseError, (arg), "because % was specified but the physical memory size could not be determined.");
return static_cast<uint64_t>(static_cast<double>(value) * static_cast<double>(mem) / 100.0);
}
-
+
std::string units("bKMGTPEZY");
std::string::size_type index = units.find(after[0]);
UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");